#if defined(__aarch64__) && !defined(__ARM_FEATURE_SHA512) && \
defined(__clang__) && __clang_major__ >= 7
#define __ARM_FEATURE_SHA512 …
#define MBEDTLS_ENABLE_ARM_SHA3_EXTENSIONS_COMPILER_FLAG
#endif
#include "common.h"
#if defined(MBEDTLS_SHA512_C) || defined(MBEDTLS_SHA384_C)
#include "mbedtls/sha512.h"
#include "mbedtls/platform_util.h"
#include "mbedtls/error.h"
#if defined(_MSC_VER) || defined(__WATCOMC__)
#define UL64 …
#else
#define UL64(x) …
#endif
#include <string.h>
#include "mbedtls/platform.h"
#if defined(__aarch64__)
# if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT) || \
defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY)
# if !defined(MBEDTLS_HAVE_NEON_INTRINSICS)
# error "Target does not support NEON instructions"
# endif
# if !defined(__ARM_FEATURE_SHA512) || defined(MBEDTLS_ENABLE_ARM_SHA3_EXTENSIONS_COMPILER_FLAG)
# if defined(__ARMCOMPILER_VERSION)
# if __ARMCOMPILER_VERSION < 6090000
# error "A more recent armclang is required for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
# elif __ARMCOMPILER_VERSION == 6090000
# error "Must use minimum -march=armv8.2-a+sha3 for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
# else
# pragma clang attribute push (__attribute__((target("sha3"))), apply_to=function)
#define MBEDTLS_POP_TARGET_PRAGMA
# endif
# elif defined(__clang__)
# if __clang_major__ < 7
# error "A more recent Clang is required for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
# else
# pragma clang attribute push (__attribute__((target("sha3"))), apply_to=function)
#define MBEDTLS_POP_TARGET_PRAGMA
# endif
# elif defined(__GNUC__)
# if __GNUC__ < 8
# error "A more recent GCC is required for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
# else
# pragma GCC push_options
# pragma GCC target ("arch=armv8.2-a+sha3")
#define MBEDTLS_POP_TARGET_PRAGMA
# endif
# else
# error "Only GCC and Clang supported for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
# endif
# endif
# endif
# if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT)
# if defined(__unix__)
# if defined(__linux__)
# include <sys/auxv.h>
# if !defined(HWCAP_SHA512)
#define HWCAP_SHA512 …
# endif
# endif
# include <signal.h>
# endif
# endif
#elif !defined(MBEDTLS_PLATFORM_IS_WINDOWS_ON_ARM64)
# undef MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY
# undef MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT
#endif
#if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT)
#if defined(HWCAP_SHA512)
static int mbedtls_a64_crypto_sha512_determine_support(void)
{
return (getauxval(AT_HWCAP) & HWCAP_SHA512) ? 1 : 0;
}
#elif defined(__APPLE__)
#include <sys/types.h>
#include <sys/sysctl.h>
static int mbedtls_a64_crypto_sha512_determine_support(void)
{
int value = 0;
size_t value_len = sizeof(value);
int ret = sysctlbyname("hw.optional.armv8_2_sha512", &value, &value_len,
NULL, 0);
return ret == 0 && value != 0;
}
#elif defined(MBEDTLS_PLATFORM_IS_WINDOWS_ON_ARM64)
#if defined(_MSC_VER)
#pragma message "No mechanism to detect A64_CRYPTO found, using C code only"
#else
#warning "No mechanism to detect A64_CRYPTO found, using C code only"
#endif
#elif defined(__unix__) && defined(SIG_SETMASK)
#include <signal.h>
#include <setjmp.h>
static jmp_buf return_from_sigill;
static void sigill_handler(int signal)
{
(void) signal;
longjmp(return_from_sigill, 1);
}
static int mbedtls_a64_crypto_sha512_determine_support(void)
{
struct sigaction old_action, new_action;
sigset_t old_mask;
if (sigprocmask(0, NULL, &old_mask)) {
return 0;
}
sigemptyset(&new_action.sa_mask);
new_action.sa_flags = 0;
new_action.sa_handler = sigill_handler;
sigaction(SIGILL, &new_action, &old_action);
static int ret = 0;
if (setjmp(return_from_sigill) == 0) {
asm ("sha512h q0, q0, v0.2d" : : : "v0");
ret = 1;
}
sigaction(SIGILL, &old_action, NULL);
sigprocmask(SIG_SETMASK, &old_mask, NULL);
return ret;
}
#else
#warning "No mechanism to detect A64_CRYPTO found, using C code only"
#undef MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT
#endif
#endif
#if !defined(MBEDTLS_SHA512_ALT)
#define SHA512_BLOCK_SIZE …
#if defined(MBEDTLS_SHA512_SMALLER)
static void sha512_put_uint64_be(uint64_t n, unsigned char *b, uint8_t i)
{
MBEDTLS_PUT_UINT64_BE(n, b, i);
}
#else
#define sha512_put_uint64_be …
#endif
void mbedtls_sha512_init(mbedtls_sha512_context *ctx)
{ … }
void mbedtls_sha512_free(mbedtls_sha512_context *ctx)
{ … }
void mbedtls_sha512_clone(mbedtls_sha512_context *dst,
const mbedtls_sha512_context *src)
{ … }
int mbedtls_sha512_starts(mbedtls_sha512_context *ctx, int is384)
{ … }
#if !defined(MBEDTLS_SHA512_PROCESS_ALT)
static const uint64_t K[80] = …;
#endif
#if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT) || \
defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY)
#if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY)
#define mbedtls_internal_sha512_process_many_a64_crypto …
#define mbedtls_internal_sha512_process_a64_crypto …
#endif
#if defined(__clang__) && \
(__clang_major__ < 13 || \
(__clang_major__ == 13 && __clang_minor__ == 0 && __clang_patchlevel__ == 0))
static inline uint64x2_t vsha512su0q_u64(uint64x2_t x, uint64x2_t y)
{
asm ("sha512su0 %0.2D,%1.2D" : "+w" (x) : "w" (y));
return x;
}
static inline uint64x2_t vsha512su1q_u64(uint64x2_t x, uint64x2_t y, uint64x2_t z)
{
asm ("sha512su1 %0.2D,%1.2D,%2.2D" : "+w" (x) : "w" (y), "w" (z));
return x;
}
static inline uint64x2_t vsha512hq_u64(uint64x2_t x, uint64x2_t y, uint64x2_t z)
{
asm ("sha512h %0,%1,%2.2D" : "+w" (x) : "w" (y), "w" (z));
return x;
}
static inline uint64x2_t vsha512h2q_u64(uint64x2_t x, uint64x2_t y, uint64x2_t z)
{
asm ("sha512h2 %0,%1,%2.2D" : "+w" (x) : "w" (y), "w" (z));
return x;
}
#endif
static size_t mbedtls_internal_sha512_process_many_a64_crypto(
mbedtls_sha512_context *ctx, const uint8_t *msg, size_t len)
{
uint64x2_t ab = vld1q_u64(&ctx->state[0]);
uint64x2_t cd = vld1q_u64(&ctx->state[2]);
uint64x2_t ef = vld1q_u64(&ctx->state[4]);
uint64x2_t gh = vld1q_u64(&ctx->state[6]);
size_t processed = 0;
for (;
len >= SHA512_BLOCK_SIZE;
processed += SHA512_BLOCK_SIZE,
msg += SHA512_BLOCK_SIZE,
len -= SHA512_BLOCK_SIZE) {
uint64x2_t initial_sum, sum, intermed;
uint64x2_t ab_orig = ab;
uint64x2_t cd_orig = cd;
uint64x2_t ef_orig = ef;
uint64x2_t gh_orig = gh;
uint64x2_t s0 = (uint64x2_t) vld1q_u8(msg + 16 * 0);
uint64x2_t s1 = (uint64x2_t) vld1q_u8(msg + 16 * 1);
uint64x2_t s2 = (uint64x2_t) vld1q_u8(msg + 16 * 2);
uint64x2_t s3 = (uint64x2_t) vld1q_u8(msg + 16 * 3);
uint64x2_t s4 = (uint64x2_t) vld1q_u8(msg + 16 * 4);
uint64x2_t s5 = (uint64x2_t) vld1q_u8(msg + 16 * 5);
uint64x2_t s6 = (uint64x2_t) vld1q_u8(msg + 16 * 6);
uint64x2_t s7 = (uint64x2_t) vld1q_u8(msg + 16 * 7);
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
s0 = vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(s0)));
s1 = vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(s1)));
s2 = vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(s2)));
s3 = vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(s3)));
s4 = vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(s4)));
s5 = vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(s5)));
s6 = vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(s6)));
s7 = vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(s7)));
#endif
initial_sum = vaddq_u64(s0, vld1q_u64(&K[0]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh);
intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1));
gh = vsha512h2q_u64(intermed, cd, ab);
cd = vaddq_u64(cd, intermed);
initial_sum = vaddq_u64(s1, vld1q_u64(&K[2]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef);
intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1));
ef = vsha512h2q_u64(intermed, ab, gh);
ab = vaddq_u64(ab, intermed);
initial_sum = vaddq_u64(s2, vld1q_u64(&K[4]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd);
intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1));
cd = vsha512h2q_u64(intermed, gh, ef);
gh = vaddq_u64(gh, intermed);
initial_sum = vaddq_u64(s3, vld1q_u64(&K[6]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab);
intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1));
ab = vsha512h2q_u64(intermed, ef, cd);
ef = vaddq_u64(ef, intermed);
initial_sum = vaddq_u64(s4, vld1q_u64(&K[8]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh);
intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1));
gh = vsha512h2q_u64(intermed, cd, ab);
cd = vaddq_u64(cd, intermed);
initial_sum = vaddq_u64(s5, vld1q_u64(&K[10]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef);
intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1));
ef = vsha512h2q_u64(intermed, ab, gh);
ab = vaddq_u64(ab, intermed);
initial_sum = vaddq_u64(s6, vld1q_u64(&K[12]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd);
intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1));
cd = vsha512h2q_u64(intermed, gh, ef);
gh = vaddq_u64(gh, intermed);
initial_sum = vaddq_u64(s7, vld1q_u64(&K[14]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab);
intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1));
ab = vsha512h2q_u64(intermed, ef, cd);
ef = vaddq_u64(ef, intermed);
for (unsigned int t = 16; t < 80; t += 16) {
s0 = vsha512su1q_u64(vsha512su0q_u64(s0, s1), s7, vextq_u64(s4, s5, 1));
initial_sum = vaddq_u64(s0, vld1q_u64(&K[t]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh);
intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1));
gh = vsha512h2q_u64(intermed, cd, ab);
cd = vaddq_u64(cd, intermed);
s1 = vsha512su1q_u64(vsha512su0q_u64(s1, s2), s0, vextq_u64(s5, s6, 1));
initial_sum = vaddq_u64(s1, vld1q_u64(&K[t + 2]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef);
intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1));
ef = vsha512h2q_u64(intermed, ab, gh);
ab = vaddq_u64(ab, intermed);
s2 = vsha512su1q_u64(vsha512su0q_u64(s2, s3), s1, vextq_u64(s6, s7, 1));
initial_sum = vaddq_u64(s2, vld1q_u64(&K[t + 4]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd);
intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1));
cd = vsha512h2q_u64(intermed, gh, ef);
gh = vaddq_u64(gh, intermed);
s3 = vsha512su1q_u64(vsha512su0q_u64(s3, s4), s2, vextq_u64(s7, s0, 1));
initial_sum = vaddq_u64(s3, vld1q_u64(&K[t + 6]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab);
intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1));
ab = vsha512h2q_u64(intermed, ef, cd);
ef = vaddq_u64(ef, intermed);
s4 = vsha512su1q_u64(vsha512su0q_u64(s4, s5), s3, vextq_u64(s0, s1, 1));
initial_sum = vaddq_u64(s4, vld1q_u64(&K[t + 8]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh);
intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1));
gh = vsha512h2q_u64(intermed, cd, ab);
cd = vaddq_u64(cd, intermed);
s5 = vsha512su1q_u64(vsha512su0q_u64(s5, s6), s4, vextq_u64(s1, s2, 1));
initial_sum = vaddq_u64(s5, vld1q_u64(&K[t + 10]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef);
intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1));
ef = vsha512h2q_u64(intermed, ab, gh);
ab = vaddq_u64(ab, intermed);
s6 = vsha512su1q_u64(vsha512su0q_u64(s6, s7), s5, vextq_u64(s2, s3, 1));
initial_sum = vaddq_u64(s6, vld1q_u64(&K[t + 12]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd);
intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1));
cd = vsha512h2q_u64(intermed, gh, ef);
gh = vaddq_u64(gh, intermed);
s7 = vsha512su1q_u64(vsha512su0q_u64(s7, s0), s6, vextq_u64(s3, s4, 1));
initial_sum = vaddq_u64(s7, vld1q_u64(&K[t + 14]));
sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab);
intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1));
ab = vsha512h2q_u64(intermed, ef, cd);
ef = vaddq_u64(ef, intermed);
}
ab = vaddq_u64(ab, ab_orig);
cd = vaddq_u64(cd, cd_orig);
ef = vaddq_u64(ef, ef_orig);
gh = vaddq_u64(gh, gh_orig);
}
vst1q_u64(&ctx->state[0], ab);
vst1q_u64(&ctx->state[2], cd);
vst1q_u64(&ctx->state[4], ef);
vst1q_u64(&ctx->state[6], gh);
return processed;
}
#if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT)
static
#endif
int mbedtls_internal_sha512_process_a64_crypto(mbedtls_sha512_context *ctx,
const unsigned char data[SHA512_BLOCK_SIZE])
{
return (mbedtls_internal_sha512_process_many_a64_crypto(ctx, data,
SHA512_BLOCK_SIZE) ==
SHA512_BLOCK_SIZE) ? 0 : -1;
}
#endif
#if defined(MBEDTLS_POP_TARGET_PRAGMA)
#if defined(__clang__)
#pragma clang attribute pop
#elif defined(__GNUC__)
#pragma GCC pop_options
#endif
#undef MBEDTLS_POP_TARGET_PRAGMA
#endif
#if !defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT)
#define mbedtls_internal_sha512_process_many_c …
#define mbedtls_internal_sha512_process_c …
#endif
#if !defined(MBEDTLS_SHA512_PROCESS_ALT) && !defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY)
#if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT)
static
#endif
int mbedtls_internal_sha512_process_c(mbedtls_sha512_context *ctx,
const unsigned char data[SHA512_BLOCK_SIZE])
{ … }
#endif
#if !defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY)
static size_t mbedtls_internal_sha512_process_many_c(
mbedtls_sha512_context *ctx, const uint8_t *data, size_t len)
{ … }
#endif
#if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT)
static int mbedtls_a64_crypto_sha512_has_support(void)
{
static int done = 0;
static int supported = 0;
if (!done) {
supported = mbedtls_a64_crypto_sha512_determine_support();
done = 1;
}
return supported;
}
static size_t mbedtls_internal_sha512_process_many(mbedtls_sha512_context *ctx,
const uint8_t *msg, size_t len)
{
if (mbedtls_a64_crypto_sha512_has_support()) {
return mbedtls_internal_sha512_process_many_a64_crypto(ctx, msg, len);
} else {
return mbedtls_internal_sha512_process_many_c(ctx, msg, len);
}
}
int mbedtls_internal_sha512_process(mbedtls_sha512_context *ctx,
const unsigned char data[SHA512_BLOCK_SIZE])
{
if (mbedtls_a64_crypto_sha512_has_support()) {
return mbedtls_internal_sha512_process_a64_crypto(ctx, data);
} else {
return mbedtls_internal_sha512_process_c(ctx, data);
}
}
#endif
int mbedtls_sha512_update(mbedtls_sha512_context *ctx,
const unsigned char *input,
size_t ilen)
{ … }
int mbedtls_sha512_finish(mbedtls_sha512_context *ctx,
unsigned char *output)
{ … }
#endif
int mbedtls_sha512(const unsigned char *input,
size_t ilen,
unsigned char *output,
int is384)
{ … }
#if defined(MBEDTLS_SELF_TEST)
static const unsigned char sha_test_buf[3][113] = …;
static const size_t sha_test_buflen[3] = …;
sha_test_sum_t;
#if defined(MBEDTLS_SHA384_C)
static sha_test_sum_t sha384_test_sum[] = …;
#endif
#if defined(MBEDTLS_SHA512_C)
static sha_test_sum_t sha512_test_sum[] = …;
#endif
static int mbedtls_sha512_common_self_test(int verbose, int is384)
{ … }
#if defined(MBEDTLS_SHA512_C)
int mbedtls_sha512_self_test(int verbose)
{ … }
#endif
#if defined(MBEDTLS_SHA384_C)
int mbedtls_sha384_self_test(int verbose)
{ … }
#endif
#undef ARRAY_LENGTH
#endif
#endif