#include <openssl/base.h>
#include "../../internal.h"
#include "internal.h"
#if !defined(BORINGSSL_HAS_UINT128) && defined(OPENSSL_SSE2)
#include <emmintrin.h>
#endif
#if defined(BORINGSSL_HAS_UINT128)
static void gcm_mul64_nohw(uint64_t *out_lo, uint64_t *out_hi, uint64_t a,
uint64_t b) { … }
#elif defined(OPENSSL_SSE2)
static __m128i gcm_mul32_nohw(uint32_t a, uint32_t b) {
__m128i aa = _mm_setr_epi32(a, 0, a, 0);
__m128i bb = _mm_setr_epi32(b, 0, b, 0);
__m128i a0a0 =
_mm_and_si128(aa, _mm_setr_epi32(0x11111111, 0, 0x11111111, 0));
__m128i a2a2 =
_mm_and_si128(aa, _mm_setr_epi32(0x44444444, 0, 0x44444444, 0));
__m128i b0b1 =
_mm_and_si128(bb, _mm_setr_epi32(0x11111111, 0, 0x22222222, 0));
__m128i b2b3 =
_mm_and_si128(bb, _mm_setr_epi32(0x44444444, 0, 0x88888888, 0));
__m128i c0c1 =
_mm_xor_si128(_mm_mul_epu32(a0a0, b0b1), _mm_mul_epu32(a2a2, b2b3));
__m128i c2c3 =
_mm_xor_si128(_mm_mul_epu32(a2a2, b0b1), _mm_mul_epu32(a0a0, b2b3));
__m128i a1a1 =
_mm_and_si128(aa, _mm_setr_epi32(0x22222222, 0, 0x22222222, 0));
__m128i a3a3 =
_mm_and_si128(aa, _mm_setr_epi32(0x88888888, 0, 0x88888888, 0));
__m128i b3b0 =
_mm_and_si128(bb, _mm_setr_epi32(0x88888888, 0, 0x11111111, 0));
__m128i b1b2 =
_mm_and_si128(bb, _mm_setr_epi32(0x22222222, 0, 0x44444444, 0));
c0c1 = _mm_xor_si128(c0c1, _mm_mul_epu32(a1a1, b3b0));
c0c1 = _mm_xor_si128(c0c1, _mm_mul_epu32(a3a3, b1b2));
c2c3 = _mm_xor_si128(c2c3, _mm_mul_epu32(a3a3, b3b0));
c2c3 = _mm_xor_si128(c2c3, _mm_mul_epu32(a1a1, b1b2));
c0c1 = _mm_and_si128(
c0c1, _mm_setr_epi32(0x11111111, 0x11111111, 0x22222222, 0x22222222));
c2c3 = _mm_and_si128(
c2c3, _mm_setr_epi32(0x44444444, 0x44444444, 0x88888888, 0x88888888));
c0c1 = _mm_xor_si128(c0c1, c2c3);
c0c1 = _mm_xor_si128(c0c1, _mm_srli_si128(c0c1, 8));
return c0c1;
}
static void gcm_mul64_nohw(uint64_t *out_lo, uint64_t *out_hi, uint64_t a,
uint64_t b) {
uint32_t a0 = a & 0xffffffff;
uint32_t a1 = a >> 32;
uint32_t b0 = b & 0xffffffff;
uint32_t b1 = b >> 32;
__m128i lo = gcm_mul32_nohw(a0, b0);
__m128i hi = gcm_mul32_nohw(a1, b1);
__m128i mid = gcm_mul32_nohw(a0 ^ a1, b0 ^ b1);
mid = _mm_xor_si128(mid, lo);
mid = _mm_xor_si128(mid, hi);
__m128i ret = _mm_unpacklo_epi64(lo, hi);
mid = _mm_slli_si128(mid, 4);
mid = _mm_and_si128(mid, _mm_setr_epi32(0, 0xffffffff, 0xffffffff, 0));
ret = _mm_xor_si128(ret, mid);
memcpy(out_lo, &ret, 8);
memcpy(out_hi, ((char*)&ret) + 8, 8);
}
#else
static uint64_t gcm_mul32_nohw(uint32_t a, uint32_t b) {
uint32_t a0 = a & 0x11111111;
uint32_t a1 = a & 0x22222222;
uint32_t a2 = a & 0x44444444;
uint32_t a3 = a & 0x88888888;
uint32_t b0 = b & 0x11111111;
uint32_t b1 = b & 0x22222222;
uint32_t b2 = b & 0x44444444;
uint32_t b3 = b & 0x88888888;
uint64_t c0 = (a0 * (uint64_t)b0) ^ (a1 * (uint64_t)b3) ^
(a2 * (uint64_t)b2) ^ (a3 * (uint64_t)b1);
uint64_t c1 = (a0 * (uint64_t)b1) ^ (a1 * (uint64_t)b0) ^
(a2 * (uint64_t)b3) ^ (a3 * (uint64_t)b2);
uint64_t c2 = (a0 * (uint64_t)b2) ^ (a1 * (uint64_t)b1) ^
(a2 * (uint64_t)b0) ^ (a3 * (uint64_t)b3);
uint64_t c3 = (a0 * (uint64_t)b3) ^ (a1 * (uint64_t)b2) ^
(a2 * (uint64_t)b1) ^ (a3 * (uint64_t)b0);
return (c0 & UINT64_C(0x1111111111111111)) |
(c1 & UINT64_C(0x2222222222222222)) |
(c2 & UINT64_C(0x4444444444444444)) |
(c3 & UINT64_C(0x8888888888888888));
}
static void gcm_mul64_nohw(uint64_t *out_lo, uint64_t *out_hi, uint64_t a,
uint64_t b) {
uint32_t a0 = a & 0xffffffff;
uint32_t a1 = a >> 32;
uint32_t b0 = b & 0xffffffff;
uint32_t b1 = b >> 32;
uint64_t lo = gcm_mul32_nohw(a0, b0);
uint64_t hi = gcm_mul32_nohw(a1, b1);
uint64_t mid = gcm_mul32_nohw(a0 ^ a1, b0 ^ b1) ^ lo ^ hi;
*out_lo = lo ^ (mid << 32);
*out_hi = hi ^ (mid >> 32);
}
#endif
void gcm_init_nohw(u128 Htable[16], const uint64_t Xi[2]) { … }
static void gcm_polyval_nohw(uint64_t Xi[2], const u128 *H) { … }
void gcm_gmult_nohw(uint8_t Xi[16], const u128 Htable[16]) { … }
void gcm_ghash_nohw(uint8_t Xi[16], const u128 Htable[16], const uint8_t *inp,
size_t len) { … }