#include "hwy/targets.h"
#include <stdint.h>
#include <stdio.h>
#include "hwy/base.h"
#include "hwy/detect_targets.h"
#include "hwy/highway.h"
#include "hwy/per_target.h"
#if HWY_ARCH_X86
#include <xmmintrin.h>
#if HWY_COMPILER_MSVC
#include <intrin.h>
#else
#include <cpuid.h>
#endif
#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RISCV) && \
HWY_OS_LINUX
#ifndef TOOLCHAIN_MISS_ASM_HWCAP_H
#include <asm/hwcap.h>
#endif
#if HWY_HAVE_AUXV
#include <sys/auxv.h>
#endif
#endif
#ifdef __APPLE__
#include <sys/sysctl.h>
#include <sys/utsname.h>
#endif
namespace hwy {
namespace {
int64_t supported_targets_for_test_ = …;
int64_t supported_mask_ = …;
#ifdef __APPLE__
static HWY_INLINE HWY_MAYBE_UNUSED bool HasCpuFeature(
const char* feature_name) {
int result = 0;
size_t len = sizeof(int);
return (sysctlbyname(feature_name, &result, &len, nullptr, 0) == 0 &&
result != 0);
}
static HWY_INLINE HWY_MAYBE_UNUSED bool ParseU32(const char*& ptr,
uint32_t& parsed_val) {
uint64_t parsed_u64 = 0;
const char* start_ptr = ptr;
for (char ch; (ch = (*ptr)) != '\0'; ++ptr) {
unsigned digit = static_cast<unsigned>(static_cast<unsigned char>(ch)) -
static_cast<unsigned>(static_cast<unsigned char>('0'));
if (digit > 9u) {
break;
}
parsed_u64 = (parsed_u64 * 10u) + digit;
if (parsed_u64 > 0xFFFFFFFFu) {
return false;
}
}
parsed_val = static_cast<uint32_t>(parsed_u64);
return (ptr != start_ptr);
}
static HWY_INLINE HWY_MAYBE_UNUSED bool IsMacOs12_2OrLater() {
utsname uname_buf;
ZeroBytes(&uname_buf, sizeof(utsname));
if ((uname(&uname_buf)) != 0) {
return false;
}
const char* ptr = uname_buf.release;
if (!ptr) {
return false;
}
uint32_t major;
uint32_t minor;
if (!ParseU32(ptr, major)) {
return false;
}
if (*ptr != '.') {
return false;
}
++ptr;
if (!ParseU32(ptr, minor)) {
return false;
}
return (major > 21 || (major == 21 && minor >= 3));
}
#endif
#if HWY_ARCH_X86 && HWY_HAVE_RUNTIME_DISPATCH
namespace x86 {
HWY_INLINE void Cpuid(const uint32_t level, const uint32_t count,
uint32_t* HWY_RESTRICT abcd) { … }
HWY_INLINE bool IsBitSet(const uint32_t reg, const int index) { … }
uint32_t ReadXCR0() { … }
bool IsAMD() { … }
enum class FeatureIndex : uint32_t { … };
static_assert …;
HWY_INLINE constexpr uint64_t Bit(FeatureIndex index) { … }
uint64_t FlagsFromCPUID() { … }
constexpr uint64_t kGroupSSE2 = …;
constexpr uint64_t kGroupSSSE3 = …;
constexpr uint64_t kGroupSSE4 = …;
#ifdef HWY_DISABLE_BMI2_FMA
constexpr uint64_t kGroupBMI2_FMA = 0;
#else
constexpr uint64_t kGroupBMI2_FMA = …;
#endif
#ifdef HWY_DISABLE_F16C
constexpr uint64_t kGroupF16C = 0;
#else
constexpr uint64_t kGroupF16C = …;
#endif
constexpr uint64_t kGroupAVX2 = …;
constexpr uint64_t kGroupAVX3 = …;
constexpr uint64_t kGroupAVX3_DL = …;
constexpr uint64_t kGroupAVX3_ZEN4 = …;
constexpr uint64_t kGroupAVX3_SPR = …;
int64_t DetectTargets() { … }
}
#elif HWY_ARCH_ARM && HWY_HAVE_RUNTIME_DISPATCH
namespace arm {
int64_t DetectTargets() {
int64_t bits = 0;
using CapBits = unsigned long;
const CapBits hw = getauxval(AT_HWCAP);
(void)hw;
#if HWY_ARCH_ARM_A64
bits |= HWY_NEON_WITHOUT_AES;
#if defined(HWCAP_AES)
if (hw & HWCAP_AES) {
bits |= HWY_NEON;
#if defined(HWCAP_ASIMDHP) && defined(HWCAP_ASIMDDP) && defined(HWCAP_ASIMDBF16)
const int64_t kGroupBF16 = HWCAP_ASIMDHP | HWCAP_ASIMDDP | HWCAP_ASIMDBF16;
if ((hw & kGroupBF16) == kGroupBF16) {
bits |= HWY_NEON_BF16;
}
#endif
}
#endif
#if defined(HWCAP_SVE)
if (hw & HWCAP_SVE) {
bits |= HWY_SVE;
}
#endif
#ifndef HWCAP2_SVE2
#define HWCAP2_SVE2 …
#endif
#ifndef HWCAP2_SVEAES
#define HWCAP2_SVEAES …
#endif
const CapBits hw2 = getauxval(AT_HWCAP2);
if ((hw2 & HWCAP2_SVE2) && (hw2 & HWCAP2_SVEAES)) {
bits |= HWY_SVE2;
}
#else
#if defined(HWCAP_NEON) && defined(HWCAP_VFPv4)
if ((hw & HWCAP_NEON) && (hw & HWCAP_VFPv4)) {
bits |= HWY_NEON_WITHOUT_AES;
}
#endif
#endif
return bits;
}
}
#elif HWY_ARCH_PPC && HWY_HAVE_RUNTIME_DISPATCH
namespace ppc {
#ifndef PPC_FEATURE_HAS_ALTIVEC
#define PPC_FEATURE_HAS_ALTIVEC …
#endif
#ifndef PPC_FEATURE_HAS_VSX
#define PPC_FEATURE_HAS_VSX …
#endif
#ifndef PPC_FEATURE2_ARCH_2_07
#define PPC_FEATURE2_ARCH_2_07 …
#endif
#ifndef PPC_FEATURE2_VEC_CRYPTO
#define PPC_FEATURE2_VEC_CRYPTO …
#endif
#ifndef PPC_FEATURE2_ARCH_3_00
#define PPC_FEATURE2_ARCH_3_00 …
#endif
#ifndef PPC_FEATURE2_ARCH_3_1
#define PPC_FEATURE2_ARCH_3_1 …
#endif
using CapBits = unsigned long;
constexpr CapBits kGroupVSX = PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_VSX;
#if defined(HWY_DISABLE_PPC8_CRYPTO)
constexpr CapBits kGroupPPC8 = PPC_FEATURE2_ARCH_2_07;
#else
constexpr CapBits kGroupPPC8 = PPC_FEATURE2_ARCH_2_07 | PPC_FEATURE2_VEC_CRYPTO;
#endif
constexpr CapBits kGroupPPC9 = kGroupPPC8 | PPC_FEATURE2_ARCH_3_00;
constexpr CapBits kGroupPPC10 = kGroupPPC9 | PPC_FEATURE2_ARCH_3_1;
int64_t DetectTargets() {
int64_t bits = 0;
#if defined(AT_HWCAP) && defined(AT_HWCAP2)
const CapBits hw = getauxval(AT_HWCAP);
if ((hw & kGroupVSX) == kGroupVSX) {
const CapBits hw2 = getauxval(AT_HWCAP2);
if ((hw2 & kGroupPPC8) == kGroupPPC8) {
bits |= HWY_PPC8;
}
if ((hw2 & kGroupPPC9) == kGroupPPC9) {
bits |= HWY_PPC9;
}
if ((hw2 & kGroupPPC10) == kGroupPPC10) {
bits |= HWY_PPC10;
}
}
#endif
return bits;
}
}
#elif HWY_ARCH_S390X && HWY_HAVE_RUNTIME_DISPATCH
namespace s390x {
#ifndef HWCAP_S390_VX
#define HWCAP_S390_VX …
#endif
#ifndef HWCAP_S390_VXE
#define HWCAP_S390_VXE …
#endif
#ifndef HWCAP_S390_VXRS_EXT2
#define HWCAP_S390_VXRS_EXT2 …
#endif
using CapBits = unsigned long;
constexpr CapBits kGroupZ14 = HWCAP_S390_VX | HWCAP_S390_VXE;
constexpr CapBits kGroupZ15 =
HWCAP_S390_VX | HWCAP_S390_VXE | HWCAP_S390_VXRS_EXT2;
int64_t DetectTargets() {
int64_t bits = 0;
#if defined(AT_HWCAP)
const CapBits hw = getauxval(AT_HWCAP);
if ((hw & kGroupZ14) == kGroupZ14) {
bits |= HWY_Z14;
}
if ((hw & kGroupZ15) == kGroupZ15) {
bits |= HWY_Z15;
}
#endif
return bits;
}
}
#elif HWY_ARCH_RISCV && HWY_HAVE_RUNTIME_DISPATCH
namespace rvv {
#ifndef HWCAP_RVV
#define COMPAT_HWCAP_ISA_V …
#endif
using CapBits = unsigned long;
int64_t DetectTargets() {
int64_t bits = 0;
const CapBits hw = getauxval(AT_HWCAP);
if ((hw & COMPAT_HWCAP_ISA_V) == COMPAT_HWCAP_ISA_V) {
size_t e8m1_vec_len;
#if HWY_ARCH_RISCV_64
int64_t vtype_reg_val;
#else
int32_t vtype_reg_val;
#endif
asm volatile(
".option push\n\t"
".option arch, +v\n\t"
"vsetvli %0, zero, e8, m1, ta, ma\n\t"
"csrr %1, vtype\n\t"
".option pop"
: "=r"(e8m1_vec_len), "=r"(vtype_reg_val));
if (vtype_reg_val >= 0 && e8m1_vec_len >= 16) {
bits |= HWY_RVV;
}
}
return bits;
}
}
#endif
int64_t DetectTargets() { … }
}
HWY_DLLEXPORT void DisableTargets(int64_t disabled_targets) { … }
HWY_DLLEXPORT void SetSupportedTargetsForTest(int64_t targets) { … }
HWY_DLLEXPORT int64_t SupportedTargets() { … }
HWY_DLLEXPORT ChosenTarget& GetChosenTarget() { … }
}