1
0
mirror of https://git.tartarus.org/simon/putty.git synced 2025-01-10 09:58:01 +00:00
putty-source/windows/winmiscs.c
Simon Tatham a9763ce4ed Hardware-accelerated SHA-512 on the Arm architecture.
The NEON support for SHA-512 acceleration looks very like SHA-256,
with a pair of chained instructions to generate a 128-bit vector
register full of message schedule, and another pair to update the hash
state based on those. But since SHA-512 is twice as big in all
dimensions, those four instructions between them only account for two
rounds of it, in place of four rounds of SHA-256.

Also, it's a tighter squeeze to fit all the data needed by those
instructions into their limited number of register operands. The NEON
SHA-256 implementation was able to keep its hash state and message
schedule stored as 128-bit vectors and then pass combinations of those
vectors directly to the instructions that did the work; for SHA-512,
in several places you have to make one of the input operands to the
main instruction by combining two halves of different vectors from
your existing state. But that operation is a quick single EXT
instruction, so no trouble.

The only other problem I've found is that clang - in particular the
version on M1 macOS, but as far as I can tell, even on current trunk -
doesn't seem to implement the NEON intrinsics for the SHA-512
extension. So I had to bodge my own versions with inline assembler in
order to get my implementation to compile under clang. Hopefully at
some point in the future the gap might be filled and I can relegate
that to a backwards-compatibility hack!

This commit adds the same kind of switching mechanism for SHA-512 that
we already had for SHA-256, SHA-1 and AES, and as with all of those,
plumbs it through to testcrypt so that you can explicitly ask for the
hardware or software version of SHA-512. So the test suite can run the
standard test vectors against both implementations in turn.

On M1 macOS, I'm testing at run time for the presence of SHA-512 by
checking a sysctl setting. You can perform the same test on the
command line by running "sysctl hw.optional.armv8_2_sha512".

As far as I can tell, on Windows there is not yet any flag to test for
this CPU feature, so for the moment, the new accelerated SHA-512 is
turned off unconditionally on Windows.
2020-12-24 15:39:54 +00:00

286 lines
7.2 KiB
C

/*
* winmiscs.c: Windows-specific standalone functions. Has the same
* relationship to winmisc.c that utils.c does to misc.c, but the
* corresponding name 'winutils.c' was already taken.
*/
#include "putty.h"
#ifndef NO_SECUREZEROMEMORY
/*
* Windows implementation of smemclr (see misc.c) using SecureZeroMemory.
*/
void smemclr(void *b, size_t n) {
if (b && n > 0)
SecureZeroMemory(b, n);
}
#endif
#ifdef MINEFIELD
/*
* Minefield - a Windows equivalent for Electric Fence
*/
#define PAGESIZE 4096
/*
* Design:
*
* We start by reserving as much virtual address space as Windows
* will sensibly (or not sensibly) let us have. We flag it all as
* invalid memory.
*
* Any allocation attempt is satisfied by committing one or more
* pages, with an uncommitted page on either side. The returned
* memory region is jammed up against the _end_ of the pages.
*
* Freeing anything causes instantaneous decommitment of the pages
* involved, so stale pointers are caught as soon as possible.
*/
static int minefield_initialised = 0;
static void *minefield_region = NULL;
static long minefield_size = 0;
static long minefield_npages = 0;
static long minefield_curpos = 0;
static unsigned short *minefield_admin = NULL;
static void *minefield_pages = NULL;
static void minefield_admin_hide(int hide)
{
int access = hide ? PAGE_NOACCESS : PAGE_READWRITE;
VirtualProtect(minefield_admin, minefield_npages * 2, access, NULL);
}
static void minefield_init(void)
{
int size;
int admin_size;
int i;
for (size = 0x40000000; size > 0; size = ((size >> 3) * 7) & ~0xFFF) {
minefield_region = VirtualAlloc(NULL, size,
MEM_RESERVE, PAGE_NOACCESS);
if (minefield_region)
break;
}
minefield_size = size;
/*
* Firstly, allocate a section of that to be the admin block.
* We'll need a two-byte field for each page.
*/
minefield_admin = minefield_region;
minefield_npages = minefield_size / PAGESIZE;
admin_size = (minefield_npages * 2 + PAGESIZE - 1) & ~(PAGESIZE - 1);
minefield_npages = (minefield_size - admin_size) / PAGESIZE;
minefield_pages = (char *) minefield_region + admin_size;
/*
* Commit the admin region.
*/
VirtualAlloc(minefield_admin, minefield_npages * 2,
MEM_COMMIT, PAGE_READWRITE);
/*
* Mark all pages as unused (0xFFFF).
*/
for (i = 0; i < minefield_npages; i++)
minefield_admin[i] = 0xFFFF;
/*
* Hide the admin region.
*/
minefield_admin_hide(1);
minefield_initialised = 1;
}
static void minefield_bomb(void)
{
div(1, *(int *) minefield_pages);
}
static void *minefield_alloc(int size)
{
int npages;
int pos, lim, region_end, region_start;
int start;
int i;
npages = (size + PAGESIZE - 1) / PAGESIZE;
minefield_admin_hide(0);
/*
* Search from current position until we find a contiguous
* bunch of npages+2 unused pages.
*/
pos = minefield_curpos;
lim = minefield_npages;
while (1) {
/* Skip over used pages. */
while (pos < lim && minefield_admin[pos] != 0xFFFF)
pos++;
/* Count unused pages. */
start = pos;
while (pos < lim && pos - start < npages + 2 &&
minefield_admin[pos] == 0xFFFF)
pos++;
if (pos - start == npages + 2)
break;
/* If we've reached the limit, reset the limit or stop. */
if (pos >= lim) {
if (lim == minefield_npages) {
/* go round and start again at zero */
lim = minefield_curpos;
pos = 0;
} else {
minefield_admin_hide(1);
return NULL;
}
}
}
minefield_curpos = pos - 1;
/*
* We have npages+2 unused pages starting at start. We leave
* the first and last of these alone and use the rest.
*/
region_end = (start + npages + 1) * PAGESIZE;
region_start = region_end - size;
/* FIXME: could align here if we wanted */
/*
* Update the admin region.
*/
for (i = start + 2; i < start + npages + 1; i++)
minefield_admin[i] = 0xFFFE; /* used but no region starts here */
minefield_admin[start + 1] = region_start % PAGESIZE;
minefield_admin_hide(1);
VirtualAlloc((char *) minefield_pages + region_start, size,
MEM_COMMIT, PAGE_READWRITE);
return (char *) minefield_pages + region_start;
}
static void minefield_free(void *ptr)
{
int region_start, i, j;
minefield_admin_hide(0);
region_start = (char *) ptr - (char *) minefield_pages;
i = region_start / PAGESIZE;
if (i < 0 || i >= minefield_npages ||
minefield_admin[i] != region_start % PAGESIZE)
minefield_bomb();
for (j = i; j < minefield_npages && minefield_admin[j] != 0xFFFF; j++) {
minefield_admin[j] = 0xFFFF;
}
VirtualFree(ptr, j * PAGESIZE - region_start, MEM_DECOMMIT);
minefield_admin_hide(1);
}
static int minefield_get_size(void *ptr)
{
int region_start, i, j;
minefield_admin_hide(0);
region_start = (char *) ptr - (char *) minefield_pages;
i = region_start / PAGESIZE;
if (i < 0 || i >= minefield_npages ||
minefield_admin[i] != region_start % PAGESIZE)
minefield_bomb();
for (j = i; j < minefield_npages && minefield_admin[j] != 0xFFFF; j++);
minefield_admin_hide(1);
return j * PAGESIZE - region_start;
}
void *minefield_c_malloc(size_t size)
{
if (!minefield_initialised)
minefield_init();
return minefield_alloc(size);
}
void minefield_c_free(void *p)
{
if (!minefield_initialised)
minefield_init();
minefield_free(p);
}
/*
* realloc _always_ moves the chunk, for rapid detection of code
* that assumes it won't.
*/
void *minefield_c_realloc(void *p, size_t size)
{
size_t oldsize;
void *q;
if (!minefield_initialised)
minefield_init();
q = minefield_alloc(size);
oldsize = minefield_get_size(p);
memcpy(q, p, (oldsize < size ? oldsize : size));
minefield_free(p);
return q;
}
#endif /* MINEFIELD */
#if defined _MSC_VER && _MSC_VER < 1800
/*
* Work around lack of strtoumax in older MSVC libraries
*/
uintmax_t strtoumax(const char *nptr, char **endptr, int base)
{
return _strtoui64(nptr, endptr, base);
}
#endif
#if defined _M_ARM || defined _M_ARM64
bool platform_aes_hw_available(void)
{
return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE);
}
bool platform_sha256_hw_available(void)
{
return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE);
}
bool platform_sha1_hw_available(void)
{
return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE);
}
bool platform_sha512_hw_available(void)
{
/* As of 2020-12-24, as far as I can tell from docs.microsoft.com,
* Windows on Arm does not yet provide a PF_ARM_V8_* flag for the
* SHA-512 architecture extension. */
return false;
}
#endif
bool is_console_handle(HANDLE handle)
{
DWORD ignored_output;
if (GetConsoleMode(handle, &ignored_output))
return true;
return false;
}