mirror of
https://git.tartarus.org/simon/putty.git
synced 2025-01-10 01:48:00 +00:00
Rejig the bottom-level loops in internal_mul_* to use pointers instead
of array indices. You'd hope that compilers could automatically turn the one representation into the other if it was faster to do so, but apparently not: even on gcc -O3, this source transformation gains over 15% performance. [originally from svn r9105]
This commit is contained in:
parent
9d4005e5c1
commit
7957ca1153
83
sshbn.c
83
sshbn.c
@ -221,10 +221,8 @@ static int mul_compute_scratch(int len)
|
|||||||
static void internal_mul(const BignumInt *a, const BignumInt *b,
|
static void internal_mul(const BignumInt *a, const BignumInt *b,
|
||||||
BignumInt *c, int len, BignumInt *scratch)
|
BignumInt *c, int len, BignumInt *scratch)
|
||||||
{
|
{
|
||||||
int i, j;
|
|
||||||
BignumDblInt t;
|
|
||||||
|
|
||||||
if (len > KARATSUBA_THRESHOLD) {
|
if (len > KARATSUBA_THRESHOLD) {
|
||||||
|
int i;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Karatsuba divide-and-conquer algorithm. Cut each input in
|
* Karatsuba divide-and-conquer algorithm. Cut each input in
|
||||||
@ -311,9 +309,9 @@ static void internal_mul(const BignumInt *a, const BignumInt *b,
|
|||||||
* copied over. */
|
* copied over. */
|
||||||
scratch[0] = scratch[1] = scratch[midlen] = scratch[midlen+1] = 0;
|
scratch[0] = scratch[1] = scratch[midlen] = scratch[midlen+1] = 0;
|
||||||
|
|
||||||
for (j = 0; j < toplen; j++) {
|
for (i = 0; i < toplen; i++) {
|
||||||
scratch[midlen - toplen + j] = a[j]; /* a_1 */
|
scratch[midlen - toplen + i] = a[i]; /* a_1 */
|
||||||
scratch[2*midlen - toplen + j] = b[j]; /* b_1 */
|
scratch[2*midlen - toplen + i] = b[i]; /* b_1 */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* compute a_1 + a_0 */
|
/* compute a_1 + a_0 */
|
||||||
@ -355,8 +353,8 @@ static void internal_mul(const BignumInt *a, const BignumInt *b,
|
|||||||
* product to obtain the middle one.
|
* product to obtain the middle one.
|
||||||
*/
|
*/
|
||||||
scratch[0] = scratch[1] = scratch[2] = scratch[3] = 0;
|
scratch[0] = scratch[1] = scratch[2] = scratch[3] = 0;
|
||||||
for (j = 0; j < 2*toplen; j++)
|
for (i = 0; i < 2*toplen; i++)
|
||||||
scratch[2*midlen - 2*toplen + j] = c[j];
|
scratch[2*midlen - 2*toplen + i] = c[i];
|
||||||
scratch[1] = internal_add(scratch+2, c + 2*toplen,
|
scratch[1] = internal_add(scratch+2, c + 2*toplen,
|
||||||
scratch+2, 2*botlen);
|
scratch+2, 2*botlen);
|
||||||
#ifdef KARA_DEBUG
|
#ifdef KARA_DEBUG
|
||||||
@ -386,13 +384,13 @@ static void internal_mul(const BignumInt *a, const BignumInt *b,
|
|||||||
carry = internal_add(c + 2*len - botlen - 2*midlen,
|
carry = internal_add(c + 2*len - botlen - 2*midlen,
|
||||||
scratch + 2*midlen,
|
scratch + 2*midlen,
|
||||||
c + 2*len - botlen - 2*midlen, 2*midlen);
|
c + 2*len - botlen - 2*midlen, 2*midlen);
|
||||||
j = 2*len - botlen - 2*midlen - 1;
|
i = 2*len - botlen - 2*midlen - 1;
|
||||||
while (carry) {
|
while (carry) {
|
||||||
assert(j >= 0);
|
assert(i >= 0);
|
||||||
carry += c[j];
|
carry += c[i];
|
||||||
c[j] = (BignumInt)carry;
|
c[i] = (BignumInt)carry;
|
||||||
carry >>= BIGNUM_INT_BITS;
|
carry >>= BIGNUM_INT_BITS;
|
||||||
j--;
|
i--;
|
||||||
}
|
}
|
||||||
#ifdef KARA_DEBUG
|
#ifdef KARA_DEBUG
|
||||||
printf("ab = 0x");
|
printf("ab = 0x");
|
||||||
@ -403,23 +401,27 @@ static void internal_mul(const BignumInt *a, const BignumInt *b,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
int i;
|
||||||
|
BignumInt carry;
|
||||||
|
BignumDblInt t;
|
||||||
|
const BignumInt *ap, *bp;
|
||||||
|
BignumInt *cp, *cps;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Multiply in the ordinary O(N^2) way.
|
* Multiply in the ordinary O(N^2) way.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
for (j = 0; j < 2 * len; j++)
|
for (i = 0; i < 2 * len; i++)
|
||||||
c[j] = 0;
|
c[i] = 0;
|
||||||
|
|
||||||
for (i = len - 1; i >= 0; i--) {
|
for (cps = c + 2*len, ap = a + len; ap-- > a; cps--) {
|
||||||
t = 0;
|
carry = 0;
|
||||||
for (j = len - 1; j >= 0; j--) {
|
for (cp = cps, bp = b + len; cp--, bp-- > b ;) {
|
||||||
t += MUL_WORD(a[i], (BignumDblInt) b[j]);
|
t = (MUL_WORD(*ap, *bp) + carry) + *cp;
|
||||||
t += (BignumDblInt) c[i + j + 1];
|
*cp = (BignumInt) t;
|
||||||
c[i + j + 1] = (BignumInt) t;
|
carry = t >> BIGNUM_INT_BITS;
|
||||||
t = t >> BIGNUM_INT_BITS;
|
|
||||||
}
|
}
|
||||||
c[i] = (BignumInt) t;
|
*cp = carry;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -432,10 +434,8 @@ static void internal_mul(const BignumInt *a, const BignumInt *b,
|
|||||||
static void internal_mul_low(const BignumInt *a, const BignumInt *b,
|
static void internal_mul_low(const BignumInt *a, const BignumInt *b,
|
||||||
BignumInt *c, int len, BignumInt *scratch)
|
BignumInt *c, int len, BignumInt *scratch)
|
||||||
{
|
{
|
||||||
int i, j;
|
|
||||||
BignumDblInt t;
|
|
||||||
|
|
||||||
if (len > KARATSUBA_THRESHOLD) {
|
if (len > KARATSUBA_THRESHOLD) {
|
||||||
|
int i;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Karatsuba-aware version of internal_mul_low. As before, we
|
* Karatsuba-aware version of internal_mul_low. As before, we
|
||||||
@ -492,8 +492,8 @@ static void internal_mul_low(const BignumInt *a, const BignumInt *b,
|
|||||||
scratch + 2*len);
|
scratch + 2*len);
|
||||||
|
|
||||||
/* Copy the bottom half of the big coefficient into place */
|
/* Copy the bottom half of the big coefficient into place */
|
||||||
for (j = 0; j < botlen; j++)
|
for (i = 0; i < botlen; i++)
|
||||||
c[toplen + j] = scratch[2*toplen + botlen + j];
|
c[toplen + i] = scratch[2*toplen + botlen + i];
|
||||||
|
|
||||||
/* Add the two small coefficients, throwing away the returned carry */
|
/* Add the two small coefficients, throwing away the returned carry */
|
||||||
internal_add(scratch, scratch + toplen, scratch, toplen);
|
internal_add(scratch, scratch + toplen, scratch, toplen);
|
||||||
@ -503,20 +503,27 @@ static void internal_mul_low(const BignumInt *a, const BignumInt *b,
|
|||||||
c, toplen);
|
c, toplen);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
int i;
|
||||||
|
BignumInt carry;
|
||||||
|
BignumDblInt t;
|
||||||
|
const BignumInt *ap, *bp;
|
||||||
|
BignumInt *cp, *cps;
|
||||||
|
|
||||||
for (j = 0; j < len; j++)
|
/*
|
||||||
c[j] = 0;
|
* Multiply in the ordinary O(N^2) way.
|
||||||
|
*/
|
||||||
|
|
||||||
for (i = len - 1; i >= 0; i--) {
|
for (i = 0; i < len; i++)
|
||||||
t = 0;
|
c[i] = 0;
|
||||||
for (j = len - 1; j >= len - i - 1; j--) {
|
|
||||||
t += MUL_WORD(a[i], (BignumDblInt) b[j]);
|
for (cps = c + len, ap = a + len; ap-- > a; cps--) {
|
||||||
t += (BignumDblInt) c[i + j + 1 - len];
|
carry = 0;
|
||||||
c[i + j + 1 - len] = (BignumInt) t;
|
for (cp = cps, bp = b + len; bp--, cp-- > c ;) {
|
||||||
t = t >> BIGNUM_INT_BITS;
|
t = (MUL_WORD(*ap, *bp) + carry) + *cp;
|
||||||
|
*cp = (BignumInt) t;
|
||||||
|
carry = t >> BIGNUM_INT_BITS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user