1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-01-10 01:48:00 +00:00

Relegate BignumDblInt to an implementation detail of sshbn.h.

As I mentioned in the previous commit, I'm going to want PuTTY to be
able to run sensibly when compiled with 64-bit Visual Studio,
including handling bignums in 64-bit chunks for speed. Unfortunately,
64-bit VS does not provide any type we can use as BignumDblInt in that
situation (unlike 64-bit gcc and clang, which give us __uint128_t).
The only facilities it provides are compiler intrinsics to access an
add-with-carry operation and a 64x64->128 multiplication (the latter
delivering its product in two separate 64-bit output chunks).

Hence, here's a substantial rework of the bignum code to make it
implement everything in terms of _those_ primitives, rather than
depending throughout on having BignumDblInt available to use ad-hoc.
BignumDblInt does still exist, for the moment, but now it's an
internal implementation detail of sshbn.h, only declared inside a new
set of macros implementing arithmetic primitives, and not accessible
to any code outside sshbn.h (which confirms that I really did catch
all uses of it and remove them).

The resulting code is surprisingly nice-looking, actually. You'd
expect more hassle and roundabout circumlocutions when you drop down
to using a more basic set of primitive operations, but actually, in
many cases it's turned out shorter to write things in terms of the new
BignumADC and BignumMUL macros - because almost all my uses of
BignumDblInt were implementing those operations anyway, taking several
lines at a time, and now they can do each thing in just one line.

The biggest headache was Poly1305: I wasn't able to find any sensible
way to adapt the existing Python script that generates the various
per-int-size implementations of arithmetic mod 2^130-5, and so I had
to rewrite it from scratch instead, with nothing in common with the
old version beyond a handful of comments. But even that seems to have
worked out nicely: the new version has much more legible descriptions
of the high-level algorithms, by virtue of having a 'Multiprecision'
type which wraps up the division into words, and yet Multiprecision's
range analysis allows it to automatically drop out special cases such
as multiplication by 5 being much easier than multiplication by
another multi-word integer.
This commit is contained in:
Simon Tatham 2015-12-16 14:12:26 +00:00
parent 482b4ab872
commit c2ec13c7e9
4 changed files with 1159 additions and 1209 deletions

View File

@ -1,108 +1,300 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys import sys
import string
from collections import namedtuple
class Output(object): class Multiprecision(object):
def __init__(self, bignum_int_bits): def __init__(self, target, minval, maxval, words):
self.bignum_int_bits = bignum_int_bits self.target = target
self.text = "" self.minval = minval
self.vars = [] self.maxval = maxval
def stmt(self, statement): self.words = words
self.text += " %s;\n" % statement assert 0 <= self.minval
def register_var(self, var): assert self.minval <= self.maxval
self.vars.append(var) assert self.target.nwords(self.maxval) == len(words)
def finalise(self):
for var in self.vars:
assert var.maxval == 0, "Variable not clear: %s" % var.name
return self.text
class Variable(object): def getword(self, n):
def __init__(self, out, name): return self.words[n] if n < len(self.words) else "0"
self.out = out
self.maxval = 0
self.name = name
self.placeval = None
self.out.stmt("BignumDblInt %s" % (self.name))
self.out.register_var(self)
def clear(self, placeval):
self.maxval = 0
self.placeval = placeval
self.out.stmt("%s = 0" % (self.name))
def set_word(self, name, limit=None):
if limit is not None:
self.maxval = limit-1
else:
self.maxval = (1 << self.out.bignum_int_bits) - 1
assert self.maxval < (1 << 2*self.out.bignum_int_bits)
self.out.stmt("%s = %s" % (self.name, name))
def add_word(self, name, limit=None):
if limit is not None:
self.maxval += limit-1
else:
self.maxval += (1 << self.out.bignum_int_bits) - 1
assert self.maxval < (1 << 2*self.out.bignum_int_bits)
self.out.stmt("%s += %s" % (self.name, name))
def add_input_word(self, fmt, wordpos, limit=None):
assert self.placeval == wordpos * self.out.bignum_int_bits
self.add_word(fmt % wordpos, limit)
def set_to_product(self, a, b, placeval):
self.maxval = ((1 << self.out.bignum_int_bits) - 1) ** 2
assert self.maxval < (1 << 2*self.out.bignum_int_bits)
self.out.stmt("%s = (BignumDblInt)(%s) * (%s)" % (self.name, a, b))
self.placeval = placeval
def add_bottom_half(self, srcvar):
self.add_word("%s & BIGNUM_INT_MASK" % (srcvar.name))
def add_top_half(self, srcvar):
self.add_word("%s >> %d" % (srcvar.name, self.out.bignum_int_bits))
def unload_into(self, topvar, botvar):
assert botvar.placeval == self.placeval
botvar.add_bottom_half(self)
assert topvar.placeval == self.placeval + self.out.bignum_int_bits
topvar.add_top_half(self)
self.maxval = 0
def output_word(self, bitpos, bits, destfmt, destwordpos):
assert bitpos == 0
assert self.placeval == destwordpos * self.out.bignum_int_bits
dest = destfmt % destwordpos
if bits == self.out.bignum_int_bits:
self.out.stmt("%s = %s" % (dest, self.name))
else:
self.out.stmt("%s = %s & (((BignumInt)1 << %d)-1)" %
(dest, self.name, bits))
def transfer_to_next_acc(self, bitpos, bits, pow5, destvar):
destbitpos = self.placeval + bitpos - 130 * pow5 - destvar.placeval
#print "transfer", "*%d" % 5**pow5, self.name, self.placeval, bitpos, destvar.name, destvar.placeval, destbitpos, bits
assert 0 <= bitpos < bitpos+bits <= self.out.bignum_int_bits
assert 0 <= destbitpos < destbitpos+bits <= self.out.bignum_int_bits
expr = self.name
if bitpos > 0:
expr = "(%s >> %d)" % (expr, bitpos)
expr = "(%s & (((BignumInt)1 << %d)-1))" % (expr, bits)
self.out.stmt("%s += %s * ((BignumDblInt)%d << %d)" %
(destvar.name, expr, 5**pow5, destbitpos))
destvar.maxval += (((1 << bits)-1) << destbitpos) * (5**pow5)
def shift_down_from(self, top):
if top is not None:
self.out.stmt("%s = %s + (%s >> %d)" %
(self.name, top.name, self.name,
self.out.bignum_int_bits))
topmaxval = top.maxval
else:
self.out.stmt("%s >>= %d" % (self.name, self.out.bignum_int_bits))
topmaxval = 0
self.maxval = topmaxval + self.maxval >> self.out.bignum_int_bits
assert self.maxval < (1 << 2*self.out.bignum_int_bits)
if top is not None:
assert self.placeval + self.out.bignum_int_bits == top.placeval
top.clear(top.placeval + self.out.bignum_int_bits)
self.placeval += self.out.bignum_int_bits
def gen_add(bignum_int_bits): def __add__(self, rhs):
out = Output(bignum_int_bits) newmin = self.minval + rhs.minval
newmax = self.maxval + rhs.maxval
nwords = self.target.nwords(newmax)
words = []
inbits = 130 addfn = self.target.add
inwords = (inbits + bignum_int_bits - 1) / bignum_int_bits for i in range(nwords):
words.append(addfn(self.getword(i), rhs.getword(i)))
addfn = self.target.adc
return Multiprecision(self.target, newmin, newmax, words)
def __mul__(self, rhs):
newmin = self.minval * rhs.minval
newmax = self.maxval * rhs.maxval
nwords = self.target.nwords(newmax)
words = []
# There are basically two strategies we could take for
# multiplying two multiprecision integers. One is to enumerate
# the space of pairs of word indices in lexicographic order,
# essentially computing a*b[i] for each i and adding them
# together; the other is to enumerate in diagonal order,
# computing everything together that belongs at a particular
# output word index.
#
# For the moment, I've gone for the former.
sprev = []
for i, sword in enumerate(self.words):
rprev = None
sthis = sprev[:i]
for j, rword in enumerate(rhs.words):
prevwords = []
if i+j < len(sprev):
prevwords.append(sprev[i+j])
if rprev is not None:
prevwords.append(rprev)
vhi, vlo = self.target.muladd(sword, rword, *prevwords)
sthis.append(vlo)
rprev = vhi
sthis.append(rprev)
sprev = sthis
# Remove unneeded words from the top of the output, if we can
# prove by range analysis that they'll always be zero.
sprev = sprev[:self.target.nwords(newmax)]
return Multiprecision(self.target, newmin, newmax, sprev)
def extract_bits(self, start, bits=None):
if bits is None:
bits = (self.maxval >> start).bit_length()
# Overly thorough range analysis: if min and max have the same
# *quotient* by 2^bits, then the result of reducing anything
# in the range [min,max] mod 2^bits has to fall within the
# obvious range. But if they have different quotients, then
# you can wrap round the modulus and so any value mod 2^bits
# is possible.
newmin = self.minval >> start
newmax = self.maxval >> start
if (newmin >> bits) != (newmax >> bits):
newmin = 0
newmax = (1 << bits) - 1
nwords = self.target.nwords(newmax)
words = []
for i in range(nwords):
srcpos = i * self.target.bits + start
maxbits = min(self.target.bits, start + bits - srcpos)
wordindex = srcpos / self.target.bits
if srcpos % self.target.bits == 0:
word = self.getword(srcpos / self.target.bits)
elif (wordindex+1 >= len(self.words) or
srcpos % self.target.bits + maxbits < self.target.bits):
word = self.target.new_value(
"(%%s) >> %d" % (srcpos % self.target.bits),
self.getword(srcpos / self.target.bits))
else:
word = self.target.new_value(
"((%%s) >> %d) | ((%%s) << %d)" % (
srcpos % self.target.bits,
self.target.bits - (srcpos % self.target.bits)),
self.getword(srcpos / self.target.bits),
self.getword(srcpos / self.target.bits + 1))
if maxbits < self.target.bits and maxbits < bits:
word = self.target.new_value(
"(%%s) & ((((BignumInt)1) << %d)-1)" % maxbits,
word)
words.append(word)
return Multiprecision(self.target, newmin, newmax, words)
# Each Statement has a list of variables it reads, and a list of ones
# it writes. 'forms' is a list of multiple actual C statements it
# could be generated as, depending on which of its output variables is
# actually used (e.g. no point calling BignumADC if the generated
# carry in a particular case is unused, or BignumMUL if nobody needs
# the top half). It is indexed by a bitmap whose bits correspond to
# the entries in wvars, with wvars[0] the MSB and wvars[-1] the LSB.
Statement = namedtuple("Statement", "rvars wvars forms")
class CodegenTarget(object):
def __init__(self, bits):
self.bits = bits
self.valindex = 0
self.stmts = []
self.generators = {}
self.bv_words = (130 + self.bits - 1) / self.bits
self.carry_index = 0
def nwords(self, maxval):
return (maxval.bit_length() + self.bits - 1) / self.bits
def stmt(self, stmt, needed=False):
index = len(self.stmts)
self.stmts.append([needed, stmt])
for val in stmt.wvars:
self.generators[val] = index
def new_value(self, formatstr=None, *deps):
name = "v%d" % self.valindex
self.valindex += 1
if formatstr is not None:
self.stmt(Statement(
rvars=deps, wvars=[name],
forms=[None, name + " = " + formatstr % deps]))
return name
def bigval_input(self, name, bits):
words = (bits + self.bits - 1) / self.bits
# Expect not to require an entire extra word
assert words == self.bv_words
return Multiprecision(self, 0, (1<<bits)-1, [
self.new_value("%s->w[%d]" % (name, i)) for i in range(words)])
def const(self, value):
# We only support constants small enough to both fit in a
# BignumInt (of any size supported) _and_ be expressible in C
# with no weird integer literal syntax like a trailing LL.
#
# Supporting larger constants would be possible - you could
# break 'value' up into word-sized pieces on the Python side,
# and generate a legal C expression for each piece by
# splitting it further into pieces within the
# standards-guaranteed 'unsigned long' limit of 32 bits and
# then casting those to BignumInt before combining them with
# shifts. But it would be a lot of effort, and since the
# application for this code doesn't even need it, there's no
# point in bothering.
assert value < 2**16
return Multiprecision(self, value, value, ["%d" % value])
def current_carry(self):
return "carry%d" % self.carry_index
def add(self, a1, a2):
ret = self.new_value()
adcform = "BignumADC(%s, carry, %s, %s, 0)" % (ret, a1, a2)
plainform = "%s = %s + %s" % (ret, a1, a2)
self.carry_index += 1
carryout = self.current_carry()
self.stmt(Statement(
rvars=[a1,a2], wvars=[ret,carryout],
forms=[None, adcform, plainform, adcform]))
return ret
def adc(self, a1, a2):
ret = self.new_value()
adcform = "BignumADC(%s, carry, %s, %s, carry)" % (ret, a1, a2)
plainform = "%s = %s + %s + carry" % (ret, a1, a2)
carryin = self.current_carry()
self.carry_index += 1
carryout = self.current_carry()
self.stmt(Statement(
rvars=[a1,a2,carryin], wvars=[ret,carryout],
forms=[None, adcform, plainform, adcform]))
return ret
def muladd(self, m1, m2, *addends):
rlo = self.new_value()
rhi = self.new_value()
wideform = "BignumMUL%s(%s)" % (
{ 0:"", 1:"ADD", 2:"ADD2" }[len(addends)],
", ".join([rhi, rlo, m1, m2] + list(addends)))
narrowform = " + ".join(["%s = %s * %s" % (rlo, m1, m2)] +
list(addends))
self.stmt(Statement(
rvars=[m1,m2]+list(addends), wvars=[rhi,rlo],
forms=[None, narrowform, wideform, wideform]))
return rhi, rlo
def write_bigval(self, name, val):
for i in range(self.bv_words):
word = val.getword(i)
self.stmt(Statement(
rvars=[word], wvars=[],
forms=["%s->w[%d] = %s" % (name, i, word)]),
needed=True)
def compute_needed(self):
used_vars = set()
self.queue = [stmt for (needed,stmt) in self.stmts if needed]
while len(self.queue) > 0:
stmt = self.queue.pop(0)
deps = []
for var in stmt.rvars:
if var[0] in string.digits:
continue # constant
deps.append(self.generators[var])
used_vars.add(var)
for index in deps:
if not self.stmts[index][0]:
self.stmts[index][0] = True
self.queue.append(self.stmts[index][1])
forms = []
for i, (needed, stmt) in enumerate(self.stmts):
if needed:
formindex = 0
for (j, var) in enumerate(stmt.wvars):
formindex *= 2
if var in used_vars:
formindex += 1
forms.append(stmt.forms[formindex])
# Now we must check whether this form of the statement
# also writes some variables we _don't_ actually need
# (e.g. if you only wanted the top half from a mul, or
# only the carry from an adc, you'd be forced to
# generate the other output too). Easiest way to do
# this is to look for an identical statement form
# later in the array.
maxindex = max(i for i in range(len(stmt.forms))
if stmt.forms[i] == stmt.forms[formindex])
extra_vars = maxindex & ~formindex
bitpos = 0
while extra_vars != 0:
if extra_vars & (1 << bitpos):
extra_vars &= ~(1 << bitpos)
var = stmt.wvars[-1-bitpos]
used_vars.add(var)
# Also, write out a cast-to-void for each
# subsequently unused value, to prevent gcc
# warnings when the output code is compiled.
forms.append("(void)" + var)
bitpos += 1
used_carry = any(v.startswith("carry") for v in used_vars)
used_vars = [v for v in used_vars if v.startswith("v")]
used_vars.sort(key=lambda v: int(v[1:]))
return used_carry, used_vars, forms
def text(self):
used_carry, values, forms = self.compute_needed()
ret = ""
while len(values) > 0:
prefix, sep, suffix = " BignumInt ", ", ", ";"
currline = values.pop(0)
while (len(values) > 0 and
len(prefix+currline+sep+values[0]+suffix) < 79):
currline += sep + values.pop(0)
ret += prefix + currline + suffix + "\n"
if used_carry:
ret += " BignumCarry carry;\n"
if ret != "":
ret += "\n"
for stmtform in forms:
ret += " %s;\n" % stmtform
return ret
def gen_add(target):
# This is an addition _without_ reduction mod p, so that it can be # This is an addition _without_ reduction mod p, so that it can be
# used both during accumulation of the polynomial and for adding # used both during accumulation of the polynomial and for adding
# on the encrypted nonce at the end (which is mod 2^128, not mod # on the encrypted nonce at the end (which is mod 2^128, not mod
@ -111,157 +303,66 @@ def gen_add(bignum_int_bits):
# Because one of the inputs will have come from our # Because one of the inputs will have come from our
# not-completely-reducing multiplication function, we expect up to # not-completely-reducing multiplication function, we expect up to
# 3 extra bits of input. # 3 extra bits of input.
acclo = Variable(out, "acclo")
acclo.clear(0) a = target.bigval_input("a", 133)
b = target.bigval_input("b", 133)
for wordpos in range(inwords): ret = a + b
limit = min(1 << bignum_int_bits, 1 << (130 - wordpos*bignum_int_bits)) target.write_bigval("r", ret)
acclo.add_input_word("a->w[%d]", wordpos, limit) return """\
acclo.add_input_word("b->w[%d]", wordpos, limit) static void bigval_add(bigval *r, const bigval *a, const bigval *b)
acclo.output_word(0, bignum_int_bits, "r->w[%d]", wordpos) {
acclo.shift_down_from(None) %s}
\n""" % target.text()
return out.finalise()
def gen_mul_1305(bignum_int_bits):
out = Output(bignum_int_bits)
inbits = 130
inwords = (inbits + bignum_int_bits - 1) / bignum_int_bits
def gen_mul(target):
# The inputs are not 100% reduced mod p. Specifically, we can get # The inputs are not 100% reduced mod p. Specifically, we can get
# a full 130-bit number from the pow5==0 pass, and then a 130-bit # a full 130-bit number from the pow5==0 pass, and then a 130-bit
# number times 5 from the pow5==1 pass, plus a possible carry. The # number times 5 from the pow5==1 pass, plus a possible carry. The
# total of that can be easily bounded above by 2^130 * 8, so we # total of that can be easily bounded above by 2^130 * 8, so we
# need to assume we're multiplying two 133-bit numbers. # need to assume we're multiplying two 133-bit numbers.
outbits = (inbits + 3) * 2
outwords = (outbits + bignum_int_bits - 1) / bignum_int_bits + 1
tmp = Variable(out, "tmp") a = target.bigval_input("a", 133)
acclo = Variable(out, "acclo") b = target.bigval_input("b", 133)
acchi = Variable(out, "acchi") ab = a * b
acc2lo = Variable(out, "acc2lo") ab0 = ab.extract_bits(0, 130)
ab1 = ab.extract_bits(130, 130)
ab2 = ab.extract_bits(260)
ab1_5 = target.const(5) * ab1
ab2_25 = target.const(25) * ab2
ret = ab0 + ab1_5 + ab2_25
target.write_bigval("r", ret)
return """\
static void bigval_mul_mod_p(bigval *r, const bigval *a, const bigval *b)
{
%s}
\n""" % target.text()
pow5, bits_at_pow5 = 0, inbits def gen_final_reduce(target):
# We take our input number n, and compute k = n + 5*(n >> 130).
acclo.clear(0)
acchi.clear(bignum_int_bits)
bits_needed_in_acc2 = bignum_int_bits
for outwordpos in range(outwords):
for a in range(inwords):
b = outwordpos - a
if 0 <= b < inwords:
tmp.set_to_product("a->w[%d]" % a, "b->w[%d]" % b,
outwordpos * bignum_int_bits)
tmp.unload_into(acchi, acclo)
bits_in_word = bignum_int_bits
bitpos = 0
#print "begin output"
while bits_in_word > 0:
chunk = min(bits_in_word, bits_at_pow5)
if pow5 > 0:
chunk = min(chunk, bits_needed_in_acc2)
if pow5 == 0:
acclo.output_word(bitpos, chunk, "r->w[%d]", outwordpos)
else:
acclo.transfer_to_next_acc(bitpos, chunk, pow5, acc2lo)
bits_needed_in_acc2 -= chunk
if bits_needed_in_acc2 == 0:
assert acc2lo.placeval % bignum_int_bits == 0
other_outwordpos = acc2lo.placeval / bignum_int_bits
acc2lo.add_input_word("r->w[%d]", other_outwordpos)
acc2lo.output_word(bitpos, bignum_int_bits, "r->w[%d]",
other_outwordpos)
acc2lo.shift_down_from(None)
bits_needed_in_acc2 = bignum_int_bits
bits_in_word -= chunk
bits_at_pow5 -= chunk
bitpos += chunk
if bits_at_pow5 == 0:
if pow5 > 0:
assert acc2lo.placeval % bignum_int_bits == 0
other_outwordpos = acc2lo.placeval / bignum_int_bits
acc2lo.add_input_word("r->w[%d]", other_outwordpos)
acc2lo.output_word(0, bignum_int_bits, "r->w[%d]",
other_outwordpos)
pow5 += 1
bits_at_pow5 = inbits
acc2lo.clear(0)
bits_needed_in_acc2 = bignum_int_bits
acclo.shift_down_from(acchi)
while acc2lo.maxval > 0:
other_outwordpos = acc2lo.placeval / bignum_int_bits
bitsleft = inbits - other_outwordpos * bignum_int_bits
limit = 1<<bitsleft if bitsleft < bignum_int_bits else None
acc2lo.add_input_word("r->w[%d]", other_outwordpos, limit=limit)
acc2lo.output_word(0, bignum_int_bits, "r->w[%d]", other_outwordpos)
acc2lo.shift_down_from(None)
return out.finalise()
def gen_final_reduce_1305(bignum_int_bits):
out = Output(bignum_int_bits)
inbits = 130
inwords = (inbits + bignum_int_bits - 1) / bignum_int_bits
# We take our input number n, and compute k = 5 + 5*(n >> 130).
# Then k >> 130 is precisely the multiple of p that needs to be # Then k >> 130 is precisely the multiple of p that needs to be
# subtracted from n to reduce it to strictly less than p. # subtracted from n to reduce it to strictly less than p.
acclo = Variable(out, "acclo") a = target.bigval_input("n", 133)
a1 = a.extract_bits(130, 130)
k = a + target.const(5) * a1
q = k.extract_bits(130)
adjusted = a + target.const(5) * q
ret = adjusted.extract_bits(0, 130)
target.write_bigval("n", ret)
return """\
static void bigval_final_reduce(bigval *n)
{
%s}
\n""" % target.text()
acclo.clear(0) pp_keyword = "#if"
# Hopefully all the bits we're shifting down fit in the same word. for bits in [16, 32, 64]:
assert 130 / bignum_int_bits == (130 + 3 - 1) / bignum_int_bits sys.stdout.write("%s BIGNUM_INT_BITS == %d\n\n" % (pp_keyword, bits))
acclo.add_word("5 * ((n->w[%d] >> %d) + 1)" % pp_keyword = "#elif"
(130 / bignum_int_bits, 130 % bignum_int_bits), sys.stdout.write(gen_add(CodegenTarget(bits)))
limit = 5 * (7 + 1)) sys.stdout.write(gen_mul(CodegenTarget(bits)))
for wordpos in range(inwords): sys.stdout.write(gen_final_reduce(CodegenTarget(bits)))
acclo.add_input_word("n->w[%d]", wordpos) sys.stdout.write("""#else
# Notionally, we could call acclo.output_word here to store #error Add another bit count to contrib/make1305.py and rerun it
# our adjusted value k. But we don't need to, because all we #endif
# actually want is the very top word of it. """)
if wordpos == 130 / bignum_int_bits:
break
acclo.shift_down_from(None)
# Now we can find the right multiple of p to subtract. We actually
# subtract it by adding 5 times it, and then finally discarding
# the top bits of the output.
# Hopefully all the bits we're shifting down fit in the same word.
assert 130 / bignum_int_bits == (130 + 3 - 1) / bignum_int_bits
acclo.set_word("5 * (acclo >> %d)" % (130 % bignum_int_bits),
limit = 5 * (7 + 1))
acclo.placeval = 0
for wordpos in range(inwords):
acclo.add_input_word("n->w[%d]", wordpos)
acclo.output_word(0, bignum_int_bits, "n->w[%d]", wordpos)
acclo.shift_down_from(None)
out.stmt("n->w[%d] &= (1 << %d) - 1" %
(130 / bignum_int_bits, 130 % bignum_int_bits))
# Here we don't call out.finalise(), because that will complain
# that there are bits of output we never dealt with. This is true,
# but all the bits in question are above 2^130, so they're bits
# we're discarding anyway.
return out.text # not out.finalise()
ops = { "mul" : gen_mul_1305,
"add" : gen_add,
"final_reduce" : gen_final_reduce_1305 }
args = sys.argv[1:]
if len(args) != 2 or args[0] not in ops:
sys.stderr.write("usage: make1305.py (%s) <bits>\n" % (" | ".join(sorted(ops))))
sys.exit(1)
sys.stdout.write(" /* ./contrib/make1305.py %s %s */\n" % tuple(args))
s = ops[args[0]](int(args[1]))
sys.stdout.write(s)

281
sshbn.c
View File

@ -87,20 +87,17 @@ Bignum bn_power_2(int n)
/* /*
* Internal addition. Sets c = a - b, where 'a', 'b' and 'c' are all * Internal addition. Sets c = a - b, where 'a', 'b' and 'c' are all
* big-endian arrays of 'len' BignumInts. Returns a BignumInt carried * big-endian arrays of 'len' BignumInts. Returns the carry off the
* off the top. * top.
*/ */
static BignumInt internal_add(const BignumInt *a, const BignumInt *b, static BignumCarry internal_add(const BignumInt *a, const BignumInt *b,
BignumInt *c, int len) BignumInt *c, int len)
{ {
int i; int i;
BignumDblInt carry = 0; BignumCarry carry = 0;
for (i = len-1; i >= 0; i--) { for (i = len-1; i >= 0; i--)
carry += (BignumDblInt)a[i] + b[i]; BignumADC(c[i], carry, a[i], b[i], carry);
c[i] = (BignumInt)carry;
carry >>= BIGNUM_INT_BITS;
}
return (BignumInt)carry; return (BignumInt)carry;
} }
@ -114,13 +111,10 @@ static void internal_sub(const BignumInt *a, const BignumInt *b,
BignumInt *c, int len) BignumInt *c, int len)
{ {
int i; int i;
BignumDblInt carry = 1; BignumCarry carry = 1;
for (i = len-1; i >= 0; i--) { for (i = len-1; i >= 0; i--)
carry += (BignumDblInt)a[i] + (b[i] ^ BIGNUM_INT_MASK); BignumADC(c[i], carry, a[i], ~b[i], carry);
c[i] = (BignumInt)carry;
carry >>= BIGNUM_INT_BITS;
}
} }
/* /*
@ -184,7 +178,7 @@ static void internal_mul(const BignumInt *a, const BignumInt *b,
int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */ int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
int midlen = botlen + 1; int midlen = botlen + 1;
BignumDblInt carry; BignumCarry carry;
#ifdef KARA_DEBUG #ifdef KARA_DEBUG
int i; int i;
#endif #endif
@ -313,9 +307,7 @@ static void internal_mul(const BignumInt *a, const BignumInt *b,
i = 2*len - botlen - 2*midlen - 1; i = 2*len - botlen - 2*midlen - 1;
while (carry) { while (carry) {
assert(i >= 0); assert(i >= 0);
carry += c[i]; BignumADC(c[i], carry, c[i], 0, carry);
c[i] = (BignumInt)carry;
carry >>= BIGNUM_INT_BITS;
i--; i--;
} }
#ifdef KARA_DEBUG #ifdef KARA_DEBUG
@ -329,7 +321,6 @@ static void internal_mul(const BignumInt *a, const BignumInt *b,
} else { } else {
int i; int i;
BignumInt carry; BignumInt carry;
BignumDblInt t;
const BignumInt *ap, *bp; const BignumInt *ap, *bp;
BignumInt *cp, *cps; BignumInt *cp, *cps;
@ -342,11 +333,8 @@ static void internal_mul(const BignumInt *a, const BignumInt *b,
for (cps = c + 2*len, ap = a + len; ap-- > a; cps--) { for (cps = c + 2*len, ap = a + len; ap-- > a; cps--) {
carry = 0; carry = 0;
for (cp = cps, bp = b + len; cp--, bp-- > b ;) { for (cp = cps, bp = b + len; cp--, bp-- > b ;)
t = (MUL_WORD(*ap, *bp) + carry) + *cp; BignumMULADD2(carry, *cp, *ap, *bp, *cp, carry);
*cp = (BignumInt) t;
carry = (BignumInt)(t >> BIGNUM_INT_BITS);
}
*cp = carry; *cp = carry;
} }
} }
@ -431,7 +419,6 @@ static void internal_mul_low(const BignumInt *a, const BignumInt *b,
} else { } else {
int i; int i;
BignumInt carry; BignumInt carry;
BignumDblInt t;
const BignumInt *ap, *bp; const BignumInt *ap, *bp;
BignumInt *cp, *cps; BignumInt *cp, *cps;
@ -444,11 +431,8 @@ static void internal_mul_low(const BignumInt *a, const BignumInt *b,
for (cps = c + len, ap = a + len; ap-- > a; cps--) { for (cps = c + len, ap = a + len; ap-- > a; cps--) {
carry = 0; carry = 0;
for (cp = cps, bp = b + len; bp--, cp-- > c ;) { for (cp = cps, bp = b + len; bp--, cp-- > c ;)
t = (MUL_WORD(*ap, *bp) + carry) + *cp; BignumMULADD2(carry, *cp, *ap, *bp, *cp, carry);
*cp = (BignumInt) t;
carry = (BignumInt)(t >> BIGNUM_INT_BITS);
}
} }
} }
} }
@ -519,15 +503,23 @@ static void internal_add_shifted(BignumInt *number,
{ {
int word = 1 + (shift / BIGNUM_INT_BITS); int word = 1 + (shift / BIGNUM_INT_BITS);
int bshift = shift % BIGNUM_INT_BITS; int bshift = shift % BIGNUM_INT_BITS;
BignumDblInt addend; BignumInt addendh, addendl;
BignumCarry carry;
addend = (BignumDblInt)n << bshift; addendl = n << bshift;
addendh = (bshift == 0 ? 0 : n >> (BIGNUM_INT_BITS - bshift));
while (addend) { assert(word <= number[0]);
BignumADC(number[word], carry, number[word], addendl, 0);
word++;
if (!addendh && !carry)
return;
assert(word <= number[0]);
BignumADC(number[word], carry, number[word], addendh, carry);
word++;
while (carry) {
assert(word <= number[0]); assert(word <= number[0]);
addend += number[word]; BignumADC(number[word], carry, number[word], 0, carry);
number[word] = (BignumInt) addend & BIGNUM_INT_MASK;
addend >>= BIGNUM_INT_BITS;
word++; word++;
} }
} }
@ -555,8 +547,7 @@ static int bn_clz(BignumInt x)
static BignumInt reciprocal_word(BignumInt d) static BignumInt reciprocal_word(BignumInt d)
{ {
BignumInt dshort, recip; BignumInt dshort, recip, prodh, prodl;
BignumDblInt product;
int corrections; int corrections;
/* /*
@ -600,15 +591,16 @@ static BignumInt reciprocal_word(BignumInt d)
* iteration, and the initial division above already gave us half * iteration, and the initial division above already gave us half
* the output word, so it's only worth doing one iteration. * the output word, so it's only worth doing one iteration.
*/ */
product = MUL_WORD(recip, d); BignumMULADD(prodh, prodl, recip, d, recip);
product += recip; prodl = ~prodl;
product = -product; /* the 2K shifts just off the top */ prodh = ~prodh;
product &= (((BignumDblInt)BIGNUM_INT_MASK << BIGNUM_INT_BITS) + {
BIGNUM_INT_MASK); BignumCarry c;
product >>= BIGNUM_INT_BITS; BignumADC(prodl, c, prodl, 1, 0);
product = MUL_WORD(product, recip); prodh += c;
product >>= (BIGNUM_INT_BITS-1); }
recip = (BignumInt)product; BignumMUL(prodh, prodl, prodh, recip);
recip = (prodh << 1) | (prodl >> (BIGNUM_INT_BITS-1));
/* /*
* Now make sure we have the best possible reciprocal estimate, * Now make sure we have the best possible reciprocal estimate,
@ -616,18 +608,24 @@ static BignumInt reciprocal_word(BignumInt d)
* way - not enough to bother with any better-thought-out kind of * way - not enough to bother with any better-thought-out kind of
* correction loop. * correction loop.
*/ */
product = MUL_WORD(recip, d); BignumMULADD(prodh, prodl, recip, d, recip);
product += recip;
corrections = 0; corrections = 0;
if (product >= ((BignumDblInt)1 << (2*BIGNUM_INT_BITS-1))) { if (prodh >= BIGNUM_TOP_BIT) {
do { do {
product -= d; BignumCarry c = 1;
BignumADC(prodl, c, prodl, ~d, c); prodh += BIGNUM_INT_MASK + c;
recip--; recip--;
corrections++; corrections++;
} while (product >= ((BignumDblInt)1 << (2*BIGNUM_INT_BITS-1))); } while (prodh >= ((BignumInt)1 << (BIGNUM_INT_BITS-1)));
} else { } else {
while (product < ((BignumDblInt)1 << (2*BIGNUM_INT_BITS-1)) - d) { while (1) {
product += d; BignumInt newprodh, newprodl;
BignumCarry c = 0;
BignumADC(newprodl, c, prodl, d, c); newprodh = prodh + c;
if (newprodh >= BIGNUM_TOP_BIT)
break;
prodh = newprodh;
prodl = newprodl;
recip++; recip++;
corrections++; corrections++;
} }
@ -679,7 +677,7 @@ static void internal_mod(BignumInt *a, int alen,
* here. * here.
*/ */
for (i = 0; i <= alen - mlen ;) { for (i = 0; i <= alen - mlen ;) {
BignumDblInt product, subtmp, t; BignumInt product;
BignumInt aword, q; BignumInt aword, q;
int shift, full_bitoffset, bitoffset, wordoffset; int shift, full_bitoffset, bitoffset, wordoffset;
@ -707,8 +705,11 @@ static void internal_mod(BignumInt *a, int alen,
if (shift > 0 && i+1 < alen) if (shift > 0 && i+1 < alen)
aword |= a[i+1] >> (BIGNUM_INT_BITS - shift); aword |= a[i+1] >> (BIGNUM_INT_BITS - shift);
t = MUL_WORD(recip, aword); {
q = (BignumInt)(t >> BIGNUM_INT_BITS); BignumInt unused;
BignumMUL(q, unused, recip, aword);
(void)unused;
}
#ifdef DIVISION_DEBUG #ifdef DIVISION_DEBUG
printf("i=%d, aword=%#0*llx, shift=%d, q=%#0*llx\n", printf("i=%d, aword=%#0*llx, shift=%d, q=%#0*llx\n",
@ -784,27 +785,22 @@ static void internal_mod(BignumInt *a, int alen,
wordoffset = alen - mlen - wordoffset; wordoffset = alen - mlen - wordoffset;
if (bitoffset == 0) { if (bitoffset == 0) {
BignumInt c = 1; BignumCarry c = 1;
BignumInt prev_hi_word = 0; BignumInt prev_hi_word = 0;
for (k = mlen - 1; wordoffset+k >= i; k--) { for (k = mlen - 1; wordoffset+k >= i; k--) {
BignumInt mword = k<0 ? 0 : m[k]; BignumInt mword = k<0 ? 0 : m[k];
product = MUL_WORD(q, mword); BignumMULADD(prev_hi_word, product, q, mword, prev_hi_word);
product += prev_hi_word;
prev_hi_word = product >> BIGNUM_INT_BITS;
#ifdef DIVISION_DEBUG #ifdef DIVISION_DEBUG
printf(" aligned sub: product word for m[%d] = %#0*llx\n", printf(" aligned sub: product word for m[%d] = %#0*llx\n",
k, BIGNUM_INT_BITS/4, k, BIGNUM_INT_BITS/4,
(unsigned long long)(BignumInt)product); (unsigned long long)product);
#endif #endif
#ifdef DIVISION_DEBUG #ifdef DIVISION_DEBUG
printf(" aligned sub: subtrahend for a[%d] = %#0*llx\n", printf(" aligned sub: subtrahend for a[%d] = %#0*llx\n",
wordoffset+k, BIGNUM_INT_BITS/4, wordoffset+k, BIGNUM_INT_BITS/4,
(unsigned long long)(BignumInt)product); (unsigned long long)product);
#endif #endif
subtmp = (BignumDblInt)a[wordoffset+k] + BignumADC(a[wordoffset+k], c, a[wordoffset+k], ~product, c);
((BignumInt)product ^ BIGNUM_INT_MASK) + c;
a[wordoffset+k] = (BignumInt)subtmp;
c = subtmp >> BIGNUM_INT_BITS;
} }
} else { } else {
BignumInt add_word = 0; BignumInt add_word = 0;
@ -812,28 +808,23 @@ static void internal_mod(BignumInt *a, int alen,
BignumInt prev_hi_word = 0; BignumInt prev_hi_word = 0;
for (k = mlen - 1; wordoffset+k >= i; k--) { for (k = mlen - 1; wordoffset+k >= i; k--) {
BignumInt mword = k<0 ? 0 : m[k]; BignumInt mword = k<0 ? 0 : m[k];
product = MUL_WORD(q, mword); BignumMULADD(prev_hi_word, product, q, mword, prev_hi_word);
product += prev_hi_word;
prev_hi_word = product >> BIGNUM_INT_BITS;
#ifdef DIVISION_DEBUG #ifdef DIVISION_DEBUG
printf(" unaligned sub: product word for m[%d] = %#0*llx\n", printf(" unaligned sub: product word for m[%d] = %#0*llx\n",
k, BIGNUM_INT_BITS/4, k, BIGNUM_INT_BITS/4,
(unsigned long long)(BignumInt)product); (unsigned long long)product);
#endif #endif
add_word |= (BignumInt)product << bitoffset; add_word |= product << bitoffset;
#ifdef DIVISION_DEBUG #ifdef DIVISION_DEBUG
printf(" unaligned sub: subtrahend for a[%d] = %#0*llx\n", printf(" unaligned sub: subtrahend for a[%d] = %#0*llx\n",
wordoffset+k, wordoffset+k,
BIGNUM_INT_BITS/4, (unsigned long long)add_word); BIGNUM_INT_BITS/4, (unsigned long long)add_word);
#endif #endif
subtmp = (BignumDblInt)a[wordoffset+k] + BignumADC(a[wordoffset+k], c, a[wordoffset+k], ~add_word, c);
(add_word ^ BIGNUM_INT_MASK) + c;
a[wordoffset+k] = (BignumInt)subtmp;
c = subtmp >> BIGNUM_INT_BITS;
add_word = (BignumInt)product >> (BIGNUM_INT_BITS - bitoffset); add_word = product >> (BIGNUM_INT_BITS - bitoffset);
} }
} }
@ -917,14 +908,11 @@ static void internal_mod(BignumInt *a, int alen,
* subtract m, and increment the quotient. * subtract m, and increment the quotient.
*/ */
{ {
BignumInt c = 1; BignumCarry c = 1;
for (i = alen - 1; i >= 0; i--) { for (i = alen - 1; i >= 0; i--) {
int mindex = mlen-alen+i; int mindex = mlen-alen+i;
BignumInt mword = mindex < 0 ? 0 : m[mindex]; BignumInt mword = mindex < 0 ? 0 : m[mindex];
BignumDblInt subtmp = (BignumDblInt)a[i] + BignumADC(a[i], c, a[i], ~mword, c);
((BignumInt)mword ^ BIGNUM_INT_MASK) + c;
a[i] = (BignumInt)subtmp;
c = subtmp >> BIGNUM_INT_BITS;
} }
} }
if (quot) if (quot)
@ -1767,12 +1755,11 @@ Bignum bigmuladd(Bignum a, Bignum b, Bignum addend)
/* now add in the addend, if any */ /* now add in the addend, if any */
if (addend) { if (addend) {
BignumDblInt carry = 0; BignumCarry carry = 0;
for (i = 1; i <= rlen; i++) { for (i = 1; i <= rlen; i++) {
carry += (i <= (int)ret[0] ? ret[i] : 0); BignumInt retword = (i <= (int)ret[0] ? ret[i] : 0);
carry += (i <= (int)addend[0] ? addend[i] : 0); BignumInt addword = (i <= (int)addend[0] ? addend[i] : 0);
ret[i] = (BignumInt) carry & BIGNUM_INT_MASK; BignumADC(ret[i], carry, retword, addword, carry);
carry >>= BIGNUM_INT_BITS;
if (ret[i] != 0 && i > maxspot) if (ret[i] != 0 && i > maxspot)
maxspot = i; maxspot = i;
} }
@ -1801,17 +1788,16 @@ Bignum bigadd(Bignum a, Bignum b)
int rlen = (alen > blen ? alen : blen) + 1; int rlen = (alen > blen ? alen : blen) + 1;
int i, maxspot; int i, maxspot;
Bignum ret; Bignum ret;
BignumDblInt carry; BignumCarry carry;
ret = newbn(rlen); ret = newbn(rlen);
carry = 0; carry = 0;
maxspot = 0; maxspot = 0;
for (i = 1; i <= rlen; i++) { for (i = 1; i <= rlen; i++) {
carry += (i <= (int)a[0] ? a[i] : 0); BignumInt aword = (i <= (int)a[0] ? a[i] : 0);
carry += (i <= (int)b[0] ? b[i] : 0); BignumInt bword = (i <= (int)b[0] ? b[i] : 0);
ret[i] = (BignumInt) carry & BIGNUM_INT_MASK; BignumADC(ret[i], carry, aword, bword, carry);
carry >>= BIGNUM_INT_BITS;
if (ret[i] != 0 && i > maxspot) if (ret[i] != 0 && i > maxspot)
maxspot = i; maxspot = i;
} }
@ -1831,17 +1817,16 @@ Bignum bigsub(Bignum a, Bignum b)
int rlen = (alen > blen ? alen : blen); int rlen = (alen > blen ? alen : blen);
int i, maxspot; int i, maxspot;
Bignum ret; Bignum ret;
BignumDblInt carry; BignumCarry carry;
ret = newbn(rlen); ret = newbn(rlen);
carry = 1; carry = 1;
maxspot = 0; maxspot = 0;
for (i = 1; i <= rlen; i++) { for (i = 1; i <= rlen; i++) {
carry += (i <= (int)a[0] ? a[i] : 0); BignumInt aword = (i <= (int)a[0] ? a[i] : 0);
carry += (i <= (int)b[0] ? b[i] ^ BIGNUM_INT_MASK : BIGNUM_INT_MASK); BignumInt bword = (i <= (int)b[0] ? b[i] : 0);
ret[i] = (BignumInt) carry & BIGNUM_INT_MASK; BignumADC(ret[i], carry, aword, ~bword, carry);
carry >>= BIGNUM_INT_BITS;
if (ret[i] != 0 && i > maxspot) if (ret[i] != 0 && i > maxspot)
maxspot = i; maxspot = i;
} }
@ -1881,40 +1866,52 @@ Bignum bignum_bitmask(Bignum n)
} }
/* /*
* Convert a (max 32-bit) long into a bignum. * Convert an unsigned long into a bignum.
*/ */
Bignum bignum_from_long(unsigned long nn) Bignum bignum_from_long(unsigned long n)
{ {
const int maxwords =
(sizeof(unsigned long) + sizeof(BignumInt) - 1) / sizeof(BignumInt);
Bignum ret; Bignum ret;
BignumDblInt n = nn; int i;
ret = newbn(maxwords);
ret[0] = 0;
for (i = 0; i < maxwords; i++) {
ret[i+1] = n >> (i * BIGNUM_INT_BITS);
if (ret[i+1] != 0)
ret[0] = i+1;
}
ret = newbn(3);
ret[1] = (BignumInt)(n & BIGNUM_INT_MASK);
ret[2] = (BignumInt)((n >> BIGNUM_INT_BITS) & BIGNUM_INT_MASK);
ret[3] = 0;
ret[0] = (ret[2] ? 2 : 1);
return ret; return ret;
} }
/* /*
* Add a long to a bignum. * Add a long to a bignum.
*/ */
Bignum bignum_add_long(Bignum number, unsigned long addendx) Bignum bignum_add_long(Bignum number, unsigned long n)
{ {
Bignum ret = newbn(number[0] + 1); const int maxwords =
int i, maxspot = 0; (sizeof(unsigned long) + sizeof(BignumInt) - 1) / sizeof(BignumInt);
BignumDblInt carry = 0, addend = addendx; Bignum ret;
int words, i;
BignumCarry carry;
for (i = 1; i <= (int)ret[0]; i++) { words = number[0];
carry += addend & BIGNUM_INT_MASK; if (words < maxwords)
carry += (i <= (int)number[0] ? number[i] : 0); words = maxwords;
addend >>= BIGNUM_INT_BITS; words++;
ret[i] = (BignumInt) carry & BIGNUM_INT_MASK; ret = newbn(words);
carry >>= BIGNUM_INT_BITS;
if (ret[i] != 0) carry = 0;
maxspot = i; ret[0] = 0;
for (i = 0; i < words; i++) {
BignumInt nword = (i < maxwords ? n >> (i * BIGNUM_INT_BITS) : 0);
BignumInt numword = (i < number[0] ? number[i+1] : 0);
BignumADC(ret[i+1], carry, numword, nword, carry);
if (ret[i+1] != 0)
ret[0] = i+1;
} }
ret[0] = maxspot;
return ret; return ret;
} }
@ -1923,13 +1920,17 @@ Bignum bignum_add_long(Bignum number, unsigned long addendx)
*/ */
unsigned short bignum_mod_short(Bignum number, unsigned short modulus) unsigned short bignum_mod_short(Bignum number, unsigned short modulus)
{ {
BignumDblInt mod, r; unsigned long mod = modulus, r = 0;
/* Precompute (BIGNUM_INT_MASK+1) % mod */
unsigned long base_r = (BIGNUM_INT_MASK - modulus + 1) % mod;
int i; int i;
r = 0; for (i = number[0]; i > 0; i--) {
mod = modulus; /*
for (i = number[0]; i > 0; i--) * Conceptually, ((r << BIGNUM_INT_BITS) + number[i]) % mod
r = (r * (BIGNUM_TOP_BIT % mod) * 2 + number[i] % mod) % mod; */
r = ((r * base_r) + (number[i] % mod)) % mod;
}
return (unsigned short) r; return (unsigned short) r;
} }
@ -2087,7 +2088,7 @@ char *bignum_decimal(Bignum x)
{ {
int ndigits, ndigit; int ndigits, ndigit;
int i, iszero; int i, iszero;
BignumDblInt carry; BignumInt carry;
char *ret; char *ret;
BignumInt *workspace; BignumInt *workspace;
@ -2134,11 +2135,33 @@ char *bignum_decimal(Bignum x)
iszero = 1; iszero = 1;
carry = 0; carry = 0;
for (i = 0; i < (int)x[0]; i++) { for (i = 0; i < (int)x[0]; i++) {
carry = (carry << BIGNUM_INT_BITS) + workspace[i]; /*
workspace[i] = (BignumInt) (carry / 10); * Conceptually, we want to compute
*
* (carry << BIGNUM_INT_BITS) + workspace[i]
* -----------------------------------------
* 10
*
* but we don't have an integer type longer than BignumInt
* to work with. So we have to do it in pieces.
*/
BignumInt q, r;
q = workspace[i] / 10;
r = workspace[i] % 10;
/* I want (BIGNUM_INT_MASK+1)/10 but can't say so directly! */
q += carry * ((BIGNUM_INT_MASK-9) / 10 + 1);
r += carry * ((BIGNUM_INT_MASK-9) % 10);
q += r / 10;
r %= 10;
workspace[i] = q;
carry = r;
if (workspace[i]) if (workspace[i])
iszero = 0; iszero = 0;
carry %= 10;
} }
ret[--ndigit] = (char) (carry + '0'); ret[--ndigit] = (char) (carry + '0');
} while (!iszero); } while (!iszero);

205
sshbn.h
View File

@ -3,58 +3,171 @@
* multiply macros used throughout the bignum code to treat numbers as * multiply macros used throughout the bignum code to treat numbers as
* arrays of the most conveniently sized word for the target machine. * arrays of the most conveniently sized word for the target machine.
* Exported so that other code (e.g. poly1305) can use it too. * Exported so that other code (e.g. poly1305) can use it too.
*
* This file must export, in whatever ifdef branch it ends up in:
*
* - two types: 'BignumInt' and 'BignumCarry'. BignumInt is an
* unsigned integer type which will be used as the base word size
* for all bignum operations. BignumCarry is an unsigned integer
* type used to hold the carry flag taken as input and output by
* the BignumADC macro (see below).
*
* - four constant macros: BIGNUM_INT_BITS, BIGNUM_INT_BYTES,
* BIGNUM_TOP_BIT, BIGNUM_INT_MASK. These should be more or less
* self-explanatory, but just in case, they give the number of bits
* in BignumInt, the number of bytes that works out to, the
* BignumInt value consisting of only the top bit, and the
* BignumInt value with all bits set.
*
* - four statement macros: BignumADC, BignumMUL, BignumMULADD,
* BignumMULADD2. These do various kinds of multi-word arithmetic,
* and all produce two output values.
* * BignumADC(ret,retc,a,b,c) takes input BignumInt values a,b
* and a BignumCarry c, and outputs a BignumInt ret = a+b+c and
* a BignumCarry retc which is the carry off the top of that
* addition.
* * BignumMUL(rh,rl,a,b) returns the two halves of the
* double-width product a*b.
* * BignumMULADD(rh,rl,a,b,addend) returns the two halves of the
* double-width value a*b + addend.
* * BignumMULADD2(rh,rl,a,b,addend1,addend2) returns the two
* halves of the double-width value a*b + addend1 + addend2.
*
* Every branch of the main ifdef below defines the type BignumInt and
* the value BIGNUM_INT_BITS. The other three constant macros are
* filled in by common code further down.
*
* Most branches also define a macro DEFINE_BIGNUMDBLINT containing a
* typedef statement which declares a type _twice_ the length of a
* BignumInt. This causes the common code further down to produce a
* default implementation of the four statement macros in terms of
* that double-width type, and also to defined BignumCarry to be
* BignumInt.
*
* However, if a particular compile target does not have a type twice
* the length of the BignumInt you want to use but it does provide
* some alternative means of doing add-with-carry and double-word
* multiply, then the ifdef branch in question can just define
* BignumCarry and the four statement macros itself, and that's fine
* too.
*/ */
#if defined __SIZEOF_INT128__ #if defined __SIZEOF_INT128__
/* gcc and clang both provide a __uint128_t type on 64-bit targets
* (and, when they do, indicate its presence by the above macro), /*
* using the same 'two machine registers' kind of code generation that * 64-bit BignumInt using gcc/clang style 128-bit BignumDblInt.
* 32-bit targets use for 64-bit ints. If we have one of these, we can *
* use a 64-bit BignumInt and a 128-bit BignumDblInt. */ * gcc and clang both provide a __uint128_t type on 64-bit targets
typedef unsigned long long BignumInt; * (and, when they do, indicate its presence by the above macro),
typedef __uint128_t BignumDblInt; * using the same 'two machine registers' kind of code generation
#define BIGNUM_INT_MASK 0xFFFFFFFFFFFFFFFFULL * that 32-bit targets use for 64-bit ints.
#define BIGNUM_TOP_BIT 0x8000000000000000ULL */
#define BIGNUM_INT_BITS 64
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2) typedef unsigned long long BignumInt;
#elif defined __GNUC__ && defined __i386__ #define BIGNUM_INT_BITS 64
typedef unsigned long BignumInt; #define DEFINE_BIGNUMDBLINT typedef __uint128_t BignumDblInt
typedef unsigned long long BignumDblInt;
#define BIGNUM_INT_MASK 0xFFFFFFFFUL #elif defined __GNUC__ || defined _LLP64 || __STDC__ >= 199901L
#define BIGNUM_TOP_BIT 0x80000000UL
#define BIGNUM_INT_BITS 32 /* 32-bit BignumInt, using C99 unsigned long long as BignumDblInt */
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
typedef unsigned int BignumInt;
#define BIGNUM_INT_BITS 32
#define DEFINE_BIGNUMDBLINT typedef unsigned long long BignumDblInt
#elif defined _MSC_VER && defined _M_IX86 #elif defined _MSC_VER && defined _M_IX86
typedef unsigned __int32 BignumInt;
typedef unsigned __int64 BignumDblInt; /* 32-bit BignumInt, using Visual Studio __int64 as BignumDblInt */
#define BIGNUM_INT_MASK 0xFFFFFFFFUL
#define BIGNUM_TOP_BIT 0x80000000UL typedef unsigned int BignumInt;
#define BIGNUM_INT_BITS 32 #define BIGNUM_INT_BITS 32
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2) #define DEFINE_BIGNUMDBLINT typedef unsigned __int64 BignumDblInt
#elif defined _LP64 #elif defined _LP64
/* 64-bit architectures can do 32x32->64 chunks at a time */
typedef unsigned int BignumInt; /*
typedef unsigned long BignumDblInt; * 32-bit BignumInt, using unsigned long itself as BignumDblInt.
#define BIGNUM_INT_MASK 0xFFFFFFFFU *
#define BIGNUM_TOP_BIT 0x80000000U * Only for platforms where long is 64 bits, of course.
#define BIGNUM_INT_BITS 32 */
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
#elif defined _LLP64 typedef unsigned int BignumInt;
/* 64-bit architectures in which unsigned long is 32 bits, not 64 */ #define BIGNUM_INT_BITS 32
typedef unsigned long BignumInt; #define DEFINE_BIGNUMDBLINT typedef unsigned long BignumDblInt
typedef unsigned long long BignumDblInt;
#define BIGNUM_INT_MASK 0xFFFFFFFFUL
#define BIGNUM_TOP_BIT 0x80000000UL
#define BIGNUM_INT_BITS 32
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
#else #else
/* Fallback for all other cases */
typedef unsigned short BignumInt; /*
typedef unsigned long BignumDblInt; * 16-bit BignumInt, using unsigned long as BignumDblInt.
#define BIGNUM_INT_MASK 0xFFFFU *
#define BIGNUM_TOP_BIT 0x8000U * This is the final fallback for real emergencies: C89 guarantees
#define BIGNUM_INT_BITS 16 * unsigned short/long to be at least the required sizes, so this
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2) * should work on any C implementation at all. But it'll be
* noticeably slow, so if you find yourself in this case you
* probably want to move heaven and earth to find an alternative!
*/
typedef unsigned short BignumInt;
#define BIGNUM_INT_BITS 16
#define DEFINE_BIGNUMDBLINT typedef unsigned long BignumDblInt
#endif #endif
/*
* Common code across all branches of that ifdef: define the three
* easy constant macros in terms of BIGNUM_INT_BITS.
*/
#define BIGNUM_INT_BYTES (BIGNUM_INT_BITS / 8) #define BIGNUM_INT_BYTES (BIGNUM_INT_BITS / 8)
#define BIGNUM_TOP_BIT (((BignumInt)1) << (BIGNUM_INT_BITS-1))
#define BIGNUM_INT_MASK (BIGNUM_TOP_BIT | (BIGNUM_TOP_BIT-1))
/*
* Common code across _most_ branches of the ifdef: define a set of
* statement macros in terms of the BignumDblInt type provided. In
* this case, we also define BignumCarry to be the same thing as
* BignumInt, for simplicity.
*/
#ifdef DEFINE_BIGNUMDBLINT
typedef BignumInt BignumCarry;
#define BignumADC(ret, retc, a, b, c) do \
{ \
DEFINE_BIGNUMDBLINT; \
BignumDblInt ADC_temp = (BignumInt)(a); \
ADC_temp += (BignumInt)(b); \
ADC_temp += (c); \
(ret) = (BignumInt)ADC_temp; \
(retc) = (BignumCarry)(ADC_temp >> BIGNUM_INT_BITS); \
} while (0)
#define BignumMUL(rh, rl, a, b) do \
{ \
DEFINE_BIGNUMDBLINT; \
BignumDblInt MUL_temp = (BignumInt)(a); \
MUL_temp *= (BignumInt)(b); \
(rh) = (BignumInt)(MUL_temp >> BIGNUM_INT_BITS); \
(rl) = (BignumInt)(MUL_temp); \
} while (0)
#define BignumMULADD(rh, rl, a, b, addend) do \
{ \
DEFINE_BIGNUMDBLINT; \
BignumDblInt MUL_temp = (BignumInt)(a); \
MUL_temp *= (BignumInt)(b); \
MUL_temp += (BignumInt)(addend); \
(rh) = (BignumInt)(MUL_temp >> BIGNUM_INT_BITS); \
(rl) = (BignumInt)(MUL_temp); \
} while (0)
#define BignumMULADD2(rh, rl, a, b, addend1, addend2) do \
{ \
DEFINE_BIGNUMDBLINT; \
BignumDblInt MUL_temp = (BignumInt)(a); \
MUL_temp *= (BignumInt)(b); \
MUL_temp += (BignumInt)(addend1); \
MUL_temp += (BignumInt)(addend2); \
(rh) = (BignumInt)(MUL_temp >> BIGNUM_INT_BITS); \
(rl) = (BignumInt)(MUL_temp); \
} while (0)
#endif /* DEFINE_BIGNUMDBLINT */

1305
sshccp.c

File diff suppressed because it is too large Load Diff