#pragma once
#include <folly/Portability.h>
#include <folly/algorithm/simd/Movemask.h>
#include <folly/algorithm/simd/detail/SimdForEach.h>
#include <folly/lang/Bits.h>
#include <array>
#if FOLLY_X64
#include <immintrin.h>
#endif
#if FOLLY_AARCH64
#include <arm_neon.h>
#endif
namespace folly {
namespace simd_detail {
#if FOLLY_X64 || FOLLY_AARCH64
template <typename Platform>
struct SimdCharPlatformCommon : Platform { … };
#endif
#if FOLLY_X64
struct SimdCharSse2PlatformSpecific {
using reg_t = __m128i;
using logical_t = reg_t;
static constexpr int kCardinal = 16;
FOLLY_ALWAYS_INLINE
static reg_t loadu(const char* p, simd_detail::ignore_none) {
return _mm_loadu_si128(reinterpret_cast<const reg_t*>(p));
}
FOLLY_DISABLE_SANITIZERS
FOLLY_ALWAYS_INLINE
static reg_t unsafeLoadu(const char* p, simd_detail::ignore_none) {
return _mm_loadu_si128(reinterpret_cast<const reg_t*>(p));
}
FOLLY_ALWAYS_INLINE
static logical_t equal(reg_t reg, char x) {
return _mm_cmpeq_epi8(reg, _mm_set1_epi8(x));
}
FOLLY_ALWAYS_INLINE
static logical_t le_unsigned(reg_t reg, char x) {
reg_t min = _mm_min_epu8(reg, _mm_set1_epi8(x));
return _mm_cmpeq_epi8(reg, min);
}
FOLLY_ALWAYS_INLINE
static logical_t logical_or(logical_t x, logical_t y) {
return _mm_or_si128(x, y);
}
FOLLY_ALWAYS_INLINE
static bool any(logical_t log, simd_detail::ignore_none) {
return folly::movemask<std::uint8_t>(log).first;
}
};
#define FOLLY_DETAIL_HAS_SIMD_CHAR_PLATFORM …
using SimdCharSse2Platform =
SimdCharPlatformCommon<SimdCharSse2PlatformSpecific>;
#if defined(__AVX2__)
struct SimdCharAvx2PlatformSpecific {
using reg_t = __m256i;
using logical_t = reg_t;
static constexpr int kCardinal = 32;
FOLLY_ALWAYS_INLINE
static reg_t loadu(const char* p, simd_detail::ignore_none) {
return _mm256_loadu_si256(reinterpret_cast<const reg_t*>(p));
}
FOLLY_DISABLE_SANITIZERS
FOLLY_ALWAYS_INLINE
static reg_t unsafeLoadu(const char* p, simd_detail::ignore_none) {
return _mm256_loadu_si256(reinterpret_cast<const reg_t*>(p));
}
FOLLY_ALWAYS_INLINE
static logical_t equal(reg_t reg, char x) {
return _mm256_cmpeq_epi8(reg, _mm256_set1_epi8(x));
}
FOLLY_ALWAYS_INLINE
static logical_t le_unsigned(reg_t reg, char x) {
reg_t min = _mm256_min_epu8(reg, _mm256_set1_epi8(x));
return _mm256_cmpeq_epi8(reg, min);
}
FOLLY_ALWAYS_INLINE
static logical_t logical_or(logical_t x, logical_t y) {
return _mm256_or_si256(x, y);
}
FOLLY_ALWAYS_INLINE
static bool any(logical_t log, simd_detail::ignore_none) {
return folly::movemask<std::uint8_t>(log).first;
}
};
using SimdCharAvx2Platform =
SimdCharPlatformCommon<SimdCharAvx2PlatformSpecific>;
using SimdCharPlatform = SimdCharAvx2Platform;
#else
using SimdCharPlatform = SimdCharSse2Platform;
#endif
#elif FOLLY_AARCH64
struct SimdCharAarch64PlatformSpecific { … };
#define FOLLY_DETAIL_HAS_SIMD_CHAR_PLATFORM …
SimdCharAarch64Platform;
SimdCharPlatform;
#define FOLLY_DETAIL_HAS_SIMD_CHAR_PLATFORM …
#else
#define FOLLY_DETAIL_HAS_SIMD_CHAR_PLATFORM …
using SimdCharPlatform = void;
#endif
}
}