putty-source/unix/uxutils.h

/*
 * uxutils.h: header included only by uxutils.c.
 *
 * The only reason this is a header file instead of a source file is
 * so that I can define 'static inline' functions which may or may not
 * be used, without provoking a compiler warning when I turn out not
 * to use them in the subsequent source file.
 */

#ifndef PUTTY_UXUTILS_H
#define PUTTY_UXUTILS_H

#if defined __APPLE__
#ifdef HAVE_SYS_SYSCTL_H
#include <sys/sysctl.h>
#endif
#endif /* defined __APPLE__ */

#if defined __arm__ || defined __aarch64__

#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif

#ifdef HAVE_SYS_AUXV_H
#include <sys/auxv.h>
#endif

#ifdef HAVE_ASM_HWCAP_H
#include <asm/hwcap.h>
#endif

#if defined HAVE_GETAUXVAL
/* No code needed: getauxval has just the API we want already */
#elif defined HAVE_ELF_AUX_INFO
/* Implement the simple getauxval API in terms of FreeBSD elf_aux_info */
static inline u_long getauxval(int which)
{
    u_long toret;
    if (elf_aux_info(which, &toret, sizeof(toret)) != 0)
        return 0;                      /* elf_aux_info didn't work */
    return toret;
}
#else
/* Implement a stub getauxval which returns no capabilities */
static inline u_long getauxval(int which) { return 0; }
#endif

#endif /* defined __arm__ || defined __aarch64__ */

#if defined __APPLE__
static inline bool test_sysctl_flag(const char *flagname)
{
#ifdef HAVE_SYSCTLBYNAME
    int value;
    size_t size = sizeof(value);
    return (sysctlbyname(flagname, &value, &size, NULL, 0) == 0 &&
            size == sizeof(value) && value != 0);
#else /* HAVE_SYSCTLBYNAME */
    return false;
#endif /* HAVE_SYSCTLBYNAME */
}
#endif /* defined __APPLE__ */

#endif /* PUTTY_UXUTILS_H */
uxutils.c: move some definitions into a header file. If the autoconf/ifdef system ends up taking the trivial branch through all the Arm-architecture ifdefs, then we define the always-fail version of getauxval as a 'static inline' function, and then (because none of our desired HWCAP_FOO values is defined at all) never call it. This leads to a compiler warning because we defined a static function and never called it - i.e. at the default -Werror, a build failure. Of course it's perfectly sensible to define a static inline function that never gets called! Header files do it all the time, and nobody is expected to ensure that if they include a header file then they take care to refer to every static inline function it defines. But if the definition is in the _source_ file rather than a header file, then clang (in particular on macOS) will give a warning. So the easy solution is to move the inline definitions of getauxval into a header file, which suppresses the warning without requiring me to faff about with further ifdefs to make the definitions conditional on at least one use. 2020-12-24 09:34:13 +00:00			`/*`
			`* uxutils.h: header included only by uxutils.c.`
			`*`
			`* The only reason this is a header file instead of a source file is`
			`* so that I can define 'static inline' functions which may or may not`
			`* be used, without provoking a compiler warning when I turn out not`
			`* to use them in the subsequent source file.`
			`*/`

			`#ifndef PUTTY_UXUTILS_H`
			`#define PUTTY_UXUTILS_H`

Include <sys/sysctl.h> for Intel builds 2020-12-24 22:33:09 +00:00			`#if defined __APPLE__`
			`#ifdef HAVE_SYS_SYSCTL_H`
			`#include <sys/sysctl.h>`
			`#endif`
			`#endif /* defined __APPLE__ */`

uxutils.c: move some definitions into a header file. If the autoconf/ifdef system ends up taking the trivial branch through all the Arm-architecture ifdefs, then we define the always-fail version of getauxval as a 'static inline' function, and then (because none of our desired HWCAP_FOO values is defined at all) never call it. This leads to a compiler warning because we defined a static function and never called it - i.e. at the default -Werror, a build failure. Of course it's perfectly sensible to define a static inline function that never gets called! Header files do it all the time, and nobody is expected to ensure that if they include a header file then they take care to refer to every static inline function it defines. But if the definition is in the _source_ file rather than a header file, then clang (in particular on macOS) will give a warning. So the easy solution is to move the inline definitions of getauxval into a header file, which suppresses the warning without requiring me to faff about with further ifdefs to make the definitions conditional on at least one use. 2020-12-24 09:34:13 +00:00			`#if defined __arm__ \|\| defined __aarch64__`

			`#ifdef HAVE_SYS_TYPES_H`
			`#include <sys/types.h>`
			`#endif`

			`#ifdef HAVE_SYS_AUXV_H`
			`#include <sys/auxv.h>`
			`#endif`

			`#ifdef HAVE_ASM_HWCAP_H`
			`#include <asm/hwcap.h>`
			`#endif`

			`#if defined HAVE_GETAUXVAL`
			`/* No code needed: getauxval has just the API we want already */`
			`#elif defined HAVE_ELF_AUX_INFO`
			`/* Implement the simple getauxval API in terms of FreeBSD elf_aux_info */`
			`static inline u_long getauxval(int which)`
			`{`
			`u_long toret;`
			`if (elf_aux_info(which, &toret, sizeof(toret)) != 0)`
			`return 0; /* elf_aux_info didn't work */`
			`return toret;`
			`}`
			`#else`
			`/* Implement a stub getauxval which returns no capabilities */`
			`static inline u_long getauxval(int which) { return 0; }`
			`#endif`

			`#endif /* defined __arm__ \|\| defined __aarch64__ */`

Hardware-accelerated SHA-512 on the Arm architecture. The NEON support for SHA-512 acceleration looks very like SHA-256, with a pair of chained instructions to generate a 128-bit vector register full of message schedule, and another pair to update the hash state based on those. But since SHA-512 is twice as big in all dimensions, those four instructions between them only account for two rounds of it, in place of four rounds of SHA-256. Also, it's a tighter squeeze to fit all the data needed by those instructions into their limited number of register operands. The NEON SHA-256 implementation was able to keep its hash state and message schedule stored as 128-bit vectors and then pass combinations of those vectors directly to the instructions that did the work; for SHA-512, in several places you have to make one of the input operands to the main instruction by combining two halves of different vectors from your existing state. But that operation is a quick single EXT instruction, so no trouble. The only other problem I've found is that clang - in particular the version on M1 macOS, but as far as I can tell, even on current trunk - doesn't seem to implement the NEON intrinsics for the SHA-512 extension. So I had to bodge my own versions with inline assembler in order to get my implementation to compile under clang. Hopefully at some point in the future the gap might be filled and I can relegate that to a backwards-compatibility hack! This commit adds the same kind of switching mechanism for SHA-512 that we already had for SHA-256, SHA-1 and AES, and as with all of those, plumbs it through to testcrypt so that you can explicitly ask for the hardware or software version of SHA-512. So the test suite can run the standard test vectors against both implementations in turn. On M1 macOS, I'm testing at run time for the presence of SHA-512 by checking a sysctl setting. You can perform the same test on the command line by running "sysctl hw.optional.armv8_2_sha512". As far as I can tell, on Windows there is not yet any flag to test for this CPU feature, so for the moment, the new accelerated SHA-512 is turned off unconditionally on Windows. 2020-12-24 11:40:15 +00:00			`#if defined __APPLE__`
			`static inline bool test_sysctl_flag(const char *flagname)`
			`{`
Fix build failure on Intel Macs. sysctlbyname() turns out to be a new library function, so we can't assume it's present just because defined __APPLE__. Add an autoconf check to see if it's really there, before trying to call it. 2020-12-24 20:45:28 +00:00			`#ifdef HAVE_SYSCTLBYNAME`
Hardware-accelerated SHA-512 on the Arm architecture. The NEON support for SHA-512 acceleration looks very like SHA-256, with a pair of chained instructions to generate a 128-bit vector register full of message schedule, and another pair to update the hash state based on those. But since SHA-512 is twice as big in all dimensions, those four instructions between them only account for two rounds of it, in place of four rounds of SHA-256. Also, it's a tighter squeeze to fit all the data needed by those instructions into their limited number of register operands. The NEON SHA-256 implementation was able to keep its hash state and message schedule stored as 128-bit vectors and then pass combinations of those vectors directly to the instructions that did the work; for SHA-512, in several places you have to make one of the input operands to the main instruction by combining two halves of different vectors from your existing state. But that operation is a quick single EXT instruction, so no trouble. The only other problem I've found is that clang - in particular the version on M1 macOS, but as far as I can tell, even on current trunk - doesn't seem to implement the NEON intrinsics for the SHA-512 extension. So I had to bodge my own versions with inline assembler in order to get my implementation to compile under clang. Hopefully at some point in the future the gap might be filled and I can relegate that to a backwards-compatibility hack! This commit adds the same kind of switching mechanism for SHA-512 that we already had for SHA-256, SHA-1 and AES, and as with all of those, plumbs it through to testcrypt so that you can explicitly ask for the hardware or software version of SHA-512. So the test suite can run the standard test vectors against both implementations in turn. On M1 macOS, I'm testing at run time for the presence of SHA-512 by checking a sysctl setting. You can perform the same test on the command line by running "sysctl hw.optional.armv8_2_sha512". As far as I can tell, on Windows there is not yet any flag to test for this CPU feature, so for the moment, the new accelerated SHA-512 is turned off unconditionally on Windows. 2020-12-24 11:40:15 +00:00			`int value;`
			`size_t size = sizeof(value);`
			`return (sysctlbyname(flagname, &value, &size, NULL, 0) == 0 &&`
			`size == sizeof(value) && value != 0);`
Fix build failure on Intel Macs. sysctlbyname() turns out to be a new library function, so we can't assume it's present just because defined __APPLE__. Add an autoconf check to see if it's really there, before trying to call it. 2020-12-24 20:45:28 +00:00			`#else /* HAVE_SYSCTLBYNAME */`
			`return false;`
			`#endif /* HAVE_SYSCTLBYNAME */`
Hardware-accelerated SHA-512 on the Arm architecture. The NEON support for SHA-512 acceleration looks very like SHA-256, with a pair of chained instructions to generate a 128-bit vector register full of message schedule, and another pair to update the hash state based on those. But since SHA-512 is twice as big in all dimensions, those four instructions between them only account for two rounds of it, in place of four rounds of SHA-256. Also, it's a tighter squeeze to fit all the data needed by those instructions into their limited number of register operands. The NEON SHA-256 implementation was able to keep its hash state and message schedule stored as 128-bit vectors and then pass combinations of those vectors directly to the instructions that did the work; for SHA-512, in several places you have to make one of the input operands to the main instruction by combining two halves of different vectors from your existing state. But that operation is a quick single EXT instruction, so no trouble. The only other problem I've found is that clang - in particular the version on M1 macOS, but as far as I can tell, even on current trunk - doesn't seem to implement the NEON intrinsics for the SHA-512 extension. So I had to bodge my own versions with inline assembler in order to get my implementation to compile under clang. Hopefully at some point in the future the gap might be filled and I can relegate that to a backwards-compatibility hack! This commit adds the same kind of switching mechanism for SHA-512 that we already had for SHA-256, SHA-1 and AES, and as with all of those, plumbs it through to testcrypt so that you can explicitly ask for the hardware or software version of SHA-512. So the test suite can run the standard test vectors against both implementations in turn. On M1 macOS, I'm testing at run time for the presence of SHA-512 by checking a sysctl setting. You can perform the same test on the command line by running "sysctl hw.optional.armv8_2_sha512". As far as I can tell, on Windows there is not yet any flag to test for this CPU feature, so for the moment, the new accelerated SHA-512 is turned off unconditionally on Windows. 2020-12-24 11:40:15 +00:00			`}`
			`#endif /* defined __APPLE__ */`

uxutils.c: move some definitions into a header file. If the autoconf/ifdef system ends up taking the trivial branch through all the Arm-architecture ifdefs, then we define the always-fail version of getauxval as a 'static inline' function, and then (because none of our desired HWCAP_FOO values is defined at all) never call it. This leads to a compiler warning because we defined a static function and never called it - i.e. at the default -Werror, a build failure. Of course it's perfectly sensible to define a static inline function that never gets called! Header files do it all the time, and nobody is expected to ensure that if they include a header file then they take care to refer to every static inline function it defines. But if the definition is in the _source_ file rather than a header file, then clang (in particular on macOS) will give a warning. So the easy solution is to move the inline definitions of getauxval into a header file, which suppresses the warning without requiring me to faff about with further ifdefs to make the definitions conditional on at least one use. 2020-12-24 09:34:13 +00:00			`#endif /* PUTTY_UXUTILS_H */`