#include "Precomp.h"
#if defined(_MSC_VER)
#if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
#endif
#endif
#include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64
#if defined(__clang__)
#if (__clang_major__ >= 8)
#define USE_HW_SHA
#ifndef __SHA__
#define ATTRIB_SHA …
#if defined(_MSC_VER)
#include <tmmintrin.h>
#define __SHA__
#endif
#endif
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 8)
#define USE_HW_SHA
#ifndef __SHA__
#define ATTRIB_SHA …
#endif
#endif
#elif defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1800)
#define USE_HW_SHA
#endif
#elif defined(_MSC_VER)
#ifdef USE_MY_MM
#define USE_VER_MIN …
#else
#define USE_VER_MIN …
#endif
#if _MSC_VER >= USE_VER_MIN
#define USE_HW_SHA
#endif
#endif
#ifdef USE_HW_SHA
#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
#include <immintrin.h>
#else
#include <emmintrin.h>
#if defined(_MSC_VER) && (_MSC_VER >= 1600)
#endif
#ifdef USE_MY_MM
#include "My_mm.h"
#endif
#endif
extern
MY_ALIGN(64)
const UInt32 SHA256_K_ARRAY[64];
#define K …
#define ADD_EPI32(dest, src) …
#define SHA256_MSG1(dest, src) …
#define SHA25G_MSG2(dest, src) …
#define LOAD_SHUFFLE(m, k) … \
#define SM1(g0, g1, g2, g3) … \
#define SM2(g0, g1, g2, g3) … \
#define NNN(g0, g1, g2, g3) …
#define RND2(t0, t1) …
#define RND2_0(m, k) … \
#define RND2_1 … \
#define R4(k, g0, g1, g2, g3, OP0, OP1) … \
#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) … \
#define PREPARE_STATE … \
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
#ifdef ATTRIB_SHA
ATTRIB_SHA
#endif
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{ … }
#endif
#elif defined(MY_CPU_ARM_OR_ARM64)
#if defined(__clang__)
#if (__clang_major__ >= 8)
#define USE_HW_SHA
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6)
#define USE_HW_SHA
#endif
#elif defined(_MSC_VER)
#if _MSC_VER >= 1910
#define USE_HW_SHA
#endif
#endif
#ifdef USE_HW_SHA
#if defined(__clang__) || defined(__GNUC__)
#ifdef MY_CPU_ARM64
#define ATTRIB_SHA …
#else
#define ATTRIB_SHA …
#endif
#else
#define _ARM_USE_NEW_NEON_INTRINSICS
#endif
#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
#endif
typedef uint32x4_t v128;
#ifdef MY_CPU_BE
#define MY_rev32_for_LE …
#else
#define MY_rev32_for_LE …
#endif
#define LOAD_128 …
#define STORE_128 …
#define LOAD_SHUFFLE … \
extern
MY_ALIGN(64)
const UInt32 SHA256_K_ARRAY[64];
#define K …
#define SHA256_SU0 …
#define SHA25G_SU1 …
#define SM1 …
#define SM2 …
#define NNN …
#define R4 … \
#define R16 … \
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
#ifdef ATTRIB_SHA
ATTRIB_SHA
#endif
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{
v128 state0, state1;
if (numBlocks == 0)
return;
state0 = LOAD_128(&state[0]);
state1 = LOAD_128(&state[4]);
do
{
v128 state0_save, state1_save;
v128 m0, m1, m2, m3;
v128 msg, tmp;
state0_save = state0;
state1_save = state1;
LOAD_SHUFFLE (m0, 0)
LOAD_SHUFFLE (m1, 1)
LOAD_SHUFFLE (m2, 2)
LOAD_SHUFFLE (m3, 3)
R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 );
R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN );
state0 = vaddq_u32(state0, state0_save);
state1 = vaddq_u32(state1, state1_save);
data += 64;
}
while (--numBlocks);
STORE_128(&state[0], state0);
STORE_128(&state[4], state1);
}
#endif
#endif
#ifndef USE_HW_SHA
void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
#pragma message("Sha256 HW-SW stub was used")
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{
Sha256_UpdateBlocks(state, data, numBlocks);
}
#endif