#include "rar.hpp"
#ifdef USE_SSE
#include <wmmintrin.h>
#endif
static byte S[256]= …;
static byte S5[256];
static byte rcon[]= …;
static byte T1[256][4],T2[256][4],T3[256][4],T4[256][4];
static byte T5[256][4],T6[256][4],T7[256][4],T8[256][4];
static byte U1[256][4],U2[256][4],U3[256][4],U4[256][4];
inline void Xor128(void *dest,const void *arg1,const void *arg2)
{ … }
inline void Xor128(byte *dest,const byte *arg1,const byte *arg2,
const byte *arg3,const byte *arg4)
{ … }
inline void Copy128(byte *dest,const byte *src)
{ … }
Rijndael::Rijndael()
{ … }
void Rijndael::Init(bool Encrypt,const byte *key,uint keyLen,const byte * initVector)
{ … }
void Rijndael::blockEncrypt(const byte *input,size_t inputLen,byte *outBuffer)
{ … }
#ifdef USE_SSE
void Rijndael::blockEncryptSSE(const byte *input,size_t numBlocks,byte *outBuffer)
{
__m128i v = _mm_loadu_si128((__m128i*)m_initVector);
__m128i *src=(__m128i*)input;
__m128i *dest=(__m128i*)outBuffer;
__m128i *rkey=(__m128i*)m_expandedKey;
while (numBlocks > 0)
{
__m128i d = _mm_loadu_si128(src++);
if (CBCMode)
v = _mm_xor_si128(v, d);
else
v = d;
__m128i r0 = _mm_loadu_si128(rkey);
v = _mm_xor_si128(v, r0);
for (int i=1; i<m_uRounds; i++)
{
__m128i ri = _mm_loadu_si128(rkey + i);
v = _mm_aesenc_si128(v, ri);
}
__m128i rl = _mm_loadu_si128(rkey + m_uRounds);
v = _mm_aesenclast_si128(v, rl);
_mm_storeu_si128(dest++,v);
numBlocks--;
}
_mm_storeu_si128((__m128i*)m_initVector,v);
}
#endif
#ifdef USE_NEON_AES
void Rijndael::blockEncryptNeon(const byte *input,size_t numBlocks,byte *outBuffer)
{
byte *prevBlock = m_initVector;
while (numBlocks > 0)
{
byte block[16];
if (CBCMode)
vst1q_u8(block, veorq_u8(vld1q_u8(prevBlock), vld1q_u8(input)));
else
vst1q_u8(block, vld1q_u8(input));
uint8x16_t data = vld1q_u8(block);
for (uint i = 0; i < m_uRounds-1; i++)
{
data = vaeseq_u8(data, vld1q_u8((byte *)m_expandedKey[i]));
data = vaesmcq_u8(data);
}
data = vaeseq_u8(data, vld1q_u8((byte *)(m_expandedKey[m_uRounds-1])));
data = veorq_u8(data, vld1q_u8((byte *)(m_expandedKey[m_uRounds])));
vst1q_u8(outBuffer, data);
prevBlock=outBuffer;
outBuffer += 16;
input += 16;
numBlocks--;
}
vst1q_u8(m_initVector, vld1q_u8(prevBlock));
return;
}
#endif
void Rijndael::blockDecrypt(const byte *input, size_t inputLen, byte *outBuffer)
{ … }
#ifdef USE_SSE
void Rijndael::blockDecryptSSE(const byte *input, size_t numBlocks, byte *outBuffer)
{
__m128i initVector = _mm_loadu_si128((__m128i*)m_initVector);
__m128i *src=(__m128i*)input;
__m128i *dest=(__m128i*)outBuffer;
__m128i *rkey=(__m128i*)m_expandedKey;
while (numBlocks > 0)
{
__m128i rl = _mm_loadu_si128(rkey + m_uRounds);
__m128i d = _mm_loadu_si128(src++);
__m128i v = _mm_xor_si128(rl, d);
for (int i=m_uRounds-1; i>0; i--)
{
__m128i ri = _mm_loadu_si128(rkey + i);
v = _mm_aesdec_si128(v, ri);
}
__m128i r0 = _mm_loadu_si128(rkey);
v = _mm_aesdeclast_si128(v, r0);
if (CBCMode)
v = _mm_xor_si128(v, initVector);
initVector = d;
_mm_storeu_si128(dest++,v);
numBlocks--;
}
_mm_storeu_si128((__m128i*)m_initVector,initVector);
}
#endif
#ifdef USE_NEON_AES
void Rijndael::blockDecryptNeon(const byte *input, size_t numBlocks, byte *outBuffer)
{
byte iv[16];
memcpy(iv,m_initVector,16);
while (numBlocks > 0)
{
uint8x16_t data = vld1q_u8(input);
for (int i=m_uRounds-1; i>0; i--)
{
data = vaesdq_u8(data, vld1q_u8((byte *)m_expandedKey[i+1]));
data = vaesimcq_u8(data);
}
data = vaesdq_u8(data, vld1q_u8((byte *)m_expandedKey[1]));
data = veorq_u8(data, vld1q_u8((byte *)m_expandedKey[0]));
if (CBCMode)
data = veorq_u8(data, vld1q_u8(iv));
vst1q_u8(iv, vld1q_u8(input));
vst1q_u8(outBuffer, data);
input += 16;
outBuffer += 16;
numBlocks--;
}
memcpy(m_initVector,iv,16);
}
#endif
void Rijndael::keySched(byte key[_MAX_KEY_COLUMNS][4])
{ … }
void Rijndael::keyEncToDec()
{ … }
static byte gmul(byte a, byte b)
{ … }
void Rijndael::GenerateTables()
{ … }
#if 0
static void TestRijndael();
struct TestRij {TestRij() {TestRijndael();exit(0);}} GlobalTestRij;
void TestRijndael()
{
byte IV[16]={0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f};
byte PT[64]={
0x6b,0xc1,0xbe,0xe2,0x2e,0x40,0x9f,0x96,0xe9,0x3d,0x7e,0x11,0x73,0x93,0x17,0x2a,
0xae,0x2d,0x8a,0x57,0x1e,0x03,0xac,0x9c,0x9e,0xb7,0x6f,0xac,0x45,0xaf,0x8e,0x51,
0x30,0xc8,0x1c,0x46,0xa3,0x5c,0xe4,0x11,0xe5,0xfb,0xc1,0x19,0x1a,0x0a,0x52,0xef,
0xf6,0x9f,0x24,0x45,0xdf,0x4f,0x9b,0x17,0xad,0x2b,0x41,0x7b,0xe6,0x6c,0x37,0x10,
};
byte Key128[16]={0x2b,0x7e,0x15,0x16,0x28,0xae,0xd2,0xa6,0xab,0xf7,0x15,0x88,0x09,0xcf,0x4f,0x3c};
byte Chk128[16]={0x3f,0xf1,0xca,0xa1,0x68,0x1f,0xac,0x09,0x12,0x0e,0xca,0x30,0x75,0x86,0xe1,0xa7};
byte Key192[24]={0x8e,0x73,0xb0,0xf7,0xda,0x0e,0x64,0x52,0xc8,0x10,0xf3,0x2b,0x80,0x90,0x79,0xe5,0x62,0xf8,0xea,0xd2,0x52,0x2c,0x6b,0x7b};
byte Chk192[16]={0x08,0xb0,0xe2,0x79,0x88,0x59,0x88,0x81,0xd9,0x20,0xa9,0xe6,0x4f,0x56,0x15,0xcd};
byte Key256[32]={0x60,0x3d,0xeb,0x10,0x15,0xca,0x71,0xbe,0x2b,0x73,0xae,0xf0,0x85,0x7d,0x77,0x81,0x1f,0x35,0x2c,0x07,0x3b,0x61,0x08,0xd7,0x2d,0x98,0x10,0xa3,0x09,0x14,0xdf,0xf4};
byte Chk256[16]={0xb2,0xeb,0x05,0xe2,0xc3,0x9b,0xe9,0xfc,0xda,0x6c,0x19,0x07,0x8c,0x6a,0x9d,0x1b};
byte *Key[3]={Key128,Key192,Key256};
byte *Chk[3]={Chk128,Chk192,Chk256};
Rijndael rij;
for (uint L=0;L<3;L++)
{
byte Out[16];
std::wstring Str;
uint KeyLength=128+L*64;
rij.Init(true,Key[L],KeyLength,IV);
for (uint I=0;I<sizeof(PT);I+=16)
rij.blockEncrypt(PT+I,16,Out);
BinToHex(Chk[L],16,Str);
mprintf(L"\nAES-%d expected: %s",KeyLength,Str.c_str());
BinToHex(Out,sizeof(Out),Str);
mprintf(L"\nAES-%d result: %s",KeyLength,Str.c_str());
if (memcmp(Out,Chk[L],16)==0)
mprintf(L" OK");
else
{
mprintf(L" FAILED");
getchar();
}
}
}
#endif