chromium/third_party/lzma_sdk/C/Sha256Opt.c

/* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions
2021-04-01 : Igor Pavlov : Public domain */

#include "Precomp.h"

#if defined(_MSC_VER)
#if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
// #define USE_MY_MM
#endif
#endif

#include "CpuArch.h"

#ifdef MY_CPU_X86_OR_AMD64
  #if defined(__clang__)
    #if (__clang_major__ >= 8) // fix that check
      #define USE_HW_SHA
      #ifndef __SHA__
        #define ATTRIB_SHA
        #if defined(_MSC_VER)
          // SSSE3: for clang-cl:
          #include <tmmintrin.h>
          #define __SHA__
        #endif
      #endif

    #endif
  #elif defined(__GNUC__)
    #if (__GNUC__ >= 8) // fix that check
      #define USE_HW_SHA
      #ifndef __SHA__
        #define ATTRIB_SHA
        // #pragma GCC target("sha,ssse3")
      #endif
    #endif
  #elif defined(__INTEL_COMPILER)
    #if (__INTEL_COMPILER >= 1800) // fix that check
      #define USE_HW_SHA
    #endif
  #elif defined(_MSC_VER)
    #ifdef USE_MY_MM
      #define USE_VER_MIN
    #else
      #define USE_VER_MIN
    #endif
    #if _MSC_VER >= USE_VER_MIN
      #define USE_HW_SHA
    #endif
  #endif
// #endif // MY_CPU_X86_OR_AMD64

#ifdef USE_HW_SHA

// #pragma message("Sha256 HW")
// #include <wmmintrin.h>

#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
#include <immintrin.h>
#else
#include <emmintrin.h>

#if defined(_MSC_VER) && (_MSC_VER >= 1600)
// #include <intrin.h>
#endif

#ifdef USE_MY_MM
#include "My_mm.h"
#endif

#endif

/*
SHA256 uses:
SSE2:
  _mm_loadu_si128
  _mm_storeu_si128
  _mm_set_epi32
  _mm_add_epi32
  _mm_shuffle_epi32 / pshufd


  
SSSE3:
  _mm_shuffle_epi8 / pshufb
  _mm_alignr_epi8
SHA:
  _mm_sha256*
*/

// K array must be aligned for 16-bytes at least.
// The compiler can look align attribute and selects
//   movdqu - for code without align attribute
//   movdqa - for code with    align attribute
extern
MY_ALIGN(64)
const UInt32 SHA256_K_ARRAY[64];

#define K


#define ADD_EPI32(dest, src)
#define SHA256_MSG1(dest, src)
#define SHA25G_MSG2(dest, src)


#define LOAD_SHUFFLE(m, k) \

#define SM1(g0, g1, g2, g3) \

#define SM2(g0, g1, g2, g3) \

// #define LS0(k, g0, g1, g2, g3) LOAD_SHUFFLE(g0, k)
// #define LS1(k, g0, g1, g2, g3) LOAD_SHUFFLE(g1, k+1)


#define NNN(g0, g1, g2, g3)


#define RND2(t0, t1)

#define RND2_0(m, k) \


#define RND2_1 \


// We use scheme with 3 rounds ahead for SHA256_MSG1 / 2 rounds ahead for SHA256_MSG2

#define R4(k, g0, g1, g2, g3, OP0, OP1) \

#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \

#define PREPARE_STATE \


void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
#ifdef ATTRIB_SHA
ATTRIB_SHA
#endif
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{}

#endif // USE_HW_SHA

#elif defined(MY_CPU_ARM_OR_ARM64)

  #if defined(__clang__)
    #if (__clang_major__ >= 8) // fix that check
      #define USE_HW_SHA
    #endif
  #elif defined(__GNUC__)
    #if (__GNUC__ >= 6) // fix that check
      #define USE_HW_SHA
    #endif
  #elif defined(_MSC_VER)
    #if _MSC_VER >= 1910
      #define USE_HW_SHA
    #endif
  #endif

#ifdef USE_HW_SHA

// #pragma message("=== Sha256 HW === ")

#if defined(__clang__) || defined(__GNUC__)
  #ifdef MY_CPU_ARM64
    #define ATTRIB_SHA
  #else
    #define ATTRIB_SHA
  #endif
#else
  // _MSC_VER
  // for arm32
  #define _ARM_USE_NEW_NEON_INTRINSICS
#endif

#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
#endif

typedef uint32x4_t v128;
// typedef __n128 v128; // MSVC

#ifdef MY_CPU_BE
  #define MY_rev32_for_LE
#else
  #define MY_rev32_for_LE
#endif

#define LOAD_128
#define STORE_128

#define LOAD_SHUFFLE \

// K array must be aligned for 16-bytes at least.
extern
MY_ALIGN(64)
const UInt32 SHA256_K_ARRAY[64];

#define K


#define SHA256_SU0
#define SHA25G_SU1

#define SM1
#define SM2
#define NNN


#define R4 \


#define R16 \


void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
#ifdef ATTRIB_SHA
ATTRIB_SHA
#endif
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{
  v128 state0, state1;

  if (numBlocks == 0)
    return;

  state0 = LOAD_128(&state[0]);
  state1 = LOAD_128(&state[4]);
  
  do
  {
    v128 state0_save, state1_save;
    v128 m0, m1, m2, m3;
    v128 msg, tmp;

    state0_save = state0;
    state1_save = state1;
    
    LOAD_SHUFFLE (m0, 0)
    LOAD_SHUFFLE (m1, 1)
    LOAD_SHUFFLE (m2, 2)
    LOAD_SHUFFLE (m3, 3)

    R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 );
    R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
    R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
    R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN );
    
    state0 = vaddq_u32(state0, state0_save);
    state1 = vaddq_u32(state1, state1_save);
    
    data += 64;
  }
  while (--numBlocks);

  STORE_128(&state[0], state0);
  STORE_128(&state[4], state1);
}

#endif // USE_HW_SHA

#endif // MY_CPU_ARM_OR_ARM64


#ifndef USE_HW_SHA

// #error Stop_Compiling_UNSUPPORTED_SHA
// #include <stdlib.h>

// #include "Sha256.h"
void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);

#pragma message("Sha256 HW-SW stub was used")

void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{
  Sha256_UpdateBlocks(state, data, numBlocks);
  /*
  UNUSED_VAR(state);
  UNUSED_VAR(data);
  UNUSED_VAR(numBlocks);
  exit(1);
  return;
  */
}

#endif