#ifndef HIGHWAY_HWY_BASE_H_
#define HIGHWAY_HWY_BASE_H_
#include <stddef.h>
#include <stdint.h>
#include "hwy/detect_compiler_arch.h"
#include "hwy/highway_export.h"
#define HWY_MAJOR …
#define HWY_MINOR …
#define HWY_PATCH …
#define HWY_VERSION_GE(major, minor) …
#define HWY_VERSION_LT(major, minor) …
#if !HWY_IDE
#if !defined(HWY_NO_LIBCXX)
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS …
#endif
#include <inttypes.h>
#endif
#if (HWY_ARCH_X86 && !defined(HWY_NO_LIBCXX)) || HWY_COMPILER_MSVC
#include <atomic>
#endif
#endif
#ifndef HWY_HAVE_CXX20_THREE_WAY_COMPARE
#if !defined(HWY_NO_LIBCXX) && defined(__cpp_impl_three_way_comparison) && \
__cpp_impl_three_way_comparison >= 201907L && HWY_HAS_INCLUDE(<compare>)
#include <compare>
#define HWY_HAVE_CXX20_THREE_WAY_COMPARE …
#else
#define HWY_HAVE_CXX20_THREE_WAY_COMPARE …
#endif
#endif
#if HWY_COMPILER_MSVC
#include <string.h>
#endif
#define HWY_STR_IMPL(macro) …
#define HWY_STR(macro) …
#if HWY_COMPILER_MSVC
#include <intrin.h>
#define HWY_FUNCTION …
#define HWY_RESTRICT …
#define HWY_INLINE …
#define HWY_NOINLINE …
#define HWY_FLATTEN
#define HWY_NORETURN …
#define HWY_LIKELY …
#define HWY_UNLIKELY …
#define HWY_PRAGMA …
#define HWY_DIAGNOSTICS …
#define HWY_DIAGNOSTICS_OFF …
#define HWY_MAYBE_UNUSED
#define HWY_HAS_ASSUME_ALIGNED …
#if (_MSC_VER >= 1700)
#define HWY_MUST_USE_RESULT …
#else
#define HWY_MUST_USE_RESULT
#endif
#else
#define HWY_FUNCTION …
#define HWY_RESTRICT …
#ifdef __OPTIMIZE__
#define HWY_INLINE …
#else
#define HWY_INLINE …
#endif
#define HWY_NOINLINE …
#define HWY_FLATTEN …
#define HWY_NORETURN …
#define HWY_LIKELY(expr) …
#define HWY_UNLIKELY(expr) …
#define HWY_PRAGMA(tokens) …
#define HWY_DIAGNOSTICS(tokens) …
#define HWY_DIAGNOSTICS_OFF(msc, gcc) …
#define HWY_MAYBE_UNUSED …
#define HWY_MUST_USE_RESULT …
#endif
namespace hwy {
#if HWY_HAS_ATTRIBUTE(__format__)
#define HWY_FORMAT(idx_fmt, idx_arg) …
#else
#define HWY_FORMAT …
#endif
#if HWY_HAS_BUILTIN(__builtin_assume_aligned)
#define HWY_ASSUME_ALIGNED(ptr, align) …
#else
#define HWY_ASSUME_ALIGNED …
#endif
#define HWY_RCAST_ALIGNED(type, ptr) …
#if HWY_COMPILER_ICC
#define HWY_PUSH_ATTRIBUTES …
#define HWY_POP_ATTRIBUTES
#elif HWY_COMPILER_CLANG
#define HWY_PUSH_ATTRIBUTES(targets_str) …
#define HWY_POP_ATTRIBUTES …
#elif HWY_COMPILER_GCC_ACTUAL
#define HWY_PUSH_ATTRIBUTES …
#define HWY_POP_ATTRIBUTES …
#else
#define HWY_PUSH_ATTRIBUTES …
#define HWY_POP_ATTRIBUTES
#endif
#define HWY_API …
#define HWY_CONCAT_IMPL(a, b) …
#define HWY_CONCAT(a, b) …
#define HWY_MIN(a, b) …
#define HWY_MAX(a, b) …
#if HWY_COMPILER_GCC_ACTUAL
#define HWY_UNROLL …
#define HWY_DEFAULT_UNROLL …
#elif HWY_COMPILER_CLANG || HWY_COMPILER_ICC || HWY_COMPILER_ICX
#define HWY_UNROLL(factor) …
#define HWY_DEFAULT_UNROLL …
#else
#define HWY_UNROLL …
#define HWY_DEFAULT_UNROLL
#endif
#if HWY_HAS_CPP_ATTRIBUTE(assume)
#define HWY_ASSUME …
#elif HWY_COMPILER_MSVC || HWY_COMPILER_ICC
#define HWY_ASSUME …
#elif HWY_COMPILER_CLANG && HWY_HAS_BUILTIN(__builtin_assume)
#define HWY_ASSUME(expr) …
#elif HWY_COMPILER_GCC_ACTUAL >= 405
#define HWY_ASSUME …
#else
#define HWY_ASSUME …
#endif
#if HWY_ARCH_X86 && !defined(HWY_NO_LIBCXX)
#define HWY_FENCE …
#else
#define HWY_FENCE
#endif
#define HWY_REP4(literal) …
HWY_DLLEXPORT HWY_NORETURN void HWY_FORMAT(3, 4)
Abort(const char* file, int line, const char* format, ...);
#define HWY_ABORT(format, ...) …
#define HWY_ASSERT(condition) …
#if HWY_HAS_FEATURE(memory_sanitizer) || defined(MEMORY_SANITIZER) || \
defined(__SANITIZE_MEMORY__)
#define HWY_IS_MSAN …
#else
#define HWY_IS_MSAN …
#endif
#if HWY_HAS_FEATURE(address_sanitizer) || defined(ADDRESS_SANITIZER) || \
defined(__SANITIZE_ADDRESS__)
#define HWY_IS_ASAN …
#else
#define HWY_IS_ASAN …
#endif
#if HWY_HAS_FEATURE(hwaddress_sanitizer) || defined(HWADDRESS_SANITIZER) || \
defined(__SANITIZE_HWADDRESS__)
#define HWY_IS_HWASAN …
#else
#define HWY_IS_HWASAN …
#endif
#if HWY_HAS_FEATURE(thread_sanitizer) || defined(THREAD_SANITIZER) || \
defined(__SANITIZE_THREAD__)
#define HWY_IS_TSAN …
#else
#define HWY_IS_TSAN …
#endif
#if HWY_HAS_FEATURE(undefined_behavior_sanitizer) || \
defined(UNDEFINED_BEHAVIOR_SANITIZER)
#define HWY_IS_UBSAN …
#else
#define HWY_IS_UBSAN …
#endif
#if HWY_IS_MSAN
#define HWY_ATTR_NO_MSAN …
#else
#define HWY_ATTR_NO_MSAN
#endif
#if !defined(HWY_IS_DEBUG_BUILD)
#if (!defined(__OPTIMIZE__) && !defined(NDEBUG)) || HWY_IS_ASAN || \
HWY_IS_HWASAN || HWY_IS_MSAN || HWY_IS_TSAN || HWY_IS_UBSAN || \
defined(__clang_analyzer__)
#define HWY_IS_DEBUG_BUILD …
#else
#define HWY_IS_DEBUG_BUILD …
#endif
#endif
#if HWY_IS_DEBUG_BUILD
#define HWY_DASSERT(condition) …
#else
#define HWY_DASSERT …
#endif
#if HWY_COMPILER_MSVC
#pragma intrinsic(memcpy)
#pragma intrinsic(memset)
#endif
template <size_t kBytes, typename From, typename To>
HWY_API void CopyBytes(const From* HWY_RESTRICT from, To* HWY_RESTRICT to) { … }
HWY_API void CopyBytes(const void* HWY_RESTRICT from, void* HWY_RESTRICT to,
size_t num_of_bytes_to_copy) { … }
template <typename From, typename To>
HWY_API void CopySameSize(const From* HWY_RESTRICT from, To* HWY_RESTRICT to) { … }
template <size_t kBytes, typename To>
HWY_API void ZeroBytes(To* to) { … }
HWY_API void ZeroBytes(void* to, size_t num_bytes) { … }
#if HWY_ARCH_X86
static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = …;
#elif HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && \
__riscv_v_intrinsic >= 11000
static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 4096;
#else
static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 16;
#endif
#if HWY_ARCH_X86
#define HWY_ALIGN_MAX …
#elif HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && \
__riscv_v_intrinsic >= 11000
#define HWY_ALIGN_MAX …
#else
#define HWY_ALIGN_MAX …
#endif
struct float16_t;
struct bfloat16_t;
float32_t;
float64_t;
#pragma pack(push, 1)
struct alignas(16) uint128_t { … };
struct alignas(16) K64V64 { … };
struct alignas(8) K32V32 { … };
#pragma pack(pop)
static inline HWY_MAYBE_UNUSED bool operator<(const uint128_t& a,
const uint128_t& b) { … }
static inline HWY_MAYBE_UNUSED bool operator>(const uint128_t& a,
const uint128_t& b) { … }
static inline HWY_MAYBE_UNUSED bool operator==(const uint128_t& a,
const uint128_t& b) { … }
static inline HWY_MAYBE_UNUSED bool operator<(const K64V64& a,
const K64V64& b) { … }
static inline HWY_MAYBE_UNUSED bool operator>(const K64V64& a,
const K64V64& b) { … }
static inline HWY_MAYBE_UNUSED bool operator==(const K64V64& a,
const K64V64& b) { … }
static inline HWY_MAYBE_UNUSED bool operator<(const K32V32& a,
const K32V32& b) { … }
static inline HWY_MAYBE_UNUSED bool operator>(const K32V32& a,
const K32V32& b) { … }
static inline HWY_MAYBE_UNUSED bool operator==(const K32V32& a,
const K32V32& b) { … }
template <bool Condition>
struct EnableIfT { … };
template <>
struct EnableIfT<true> { … };
EnableIf;
template <typename T, typename U>
struct IsSameT { … };
IsSameT<T, T>;
template <typename T, typename U>
HWY_API constexpr bool IsSame() { … }
template <typename T, typename U1, typename U2>
HWY_API constexpr bool IsSameEither() { … }
template <bool Condition, typename Then, typename Else>
struct IfT { … };
IfT<false, Then, Else>;
If;
template <typename T>
struct IsConstT { … };
IsConstT<const T>;
template <typename T>
HWY_API constexpr bool IsConst() { … }
template <class T>
struct RemoveConstT { … };
RemoveConstT<const T>;
RemoveConst;
template <class T>
struct RemoveVolatileT { … };
RemoveVolatileT<volatile T>;
RemoveVolatile;
template <class T>
struct RemoveRefT { … };
RemoveRefT<T &>;
RemoveRefT<T &&>;
RemoveRef;
RemoveCvRef;
template <class T>
struct RemovePtrT { … };
RemovePtrT<T *>;
RemovePtrT<const T *>;
RemovePtrT<volatile T *>;
RemovePtrT<const volatile T *>;
RemovePtr;
#define HWY_IF_V_SIZE(T, kN, bytes) …
#define HWY_IF_V_SIZE_LE(T, kN, bytes) …
#define HWY_IF_V_SIZE_GT(T, kN, bytes) …
#define HWY_IF_LANES(kN, lanes) …
#define HWY_IF_LANES_LE(kN, lanes) …
#define HWY_IF_LANES_GT(kN, lanes) …
#define HWY_IF_UNSIGNED(T) …
#define HWY_IF_NOT_UNSIGNED(T) …
#define HWY_IF_SIGNED(T) …
#define HWY_IF_FLOAT(T) …
#define HWY_IF_NOT_FLOAT(T) …
#define HWY_IF_FLOAT3264(T) …
#define HWY_IF_NOT_FLOAT3264(T) …
#define HWY_IF_SPECIAL_FLOAT(T) …
#define HWY_IF_NOT_SPECIAL_FLOAT(T) …
#define HWY_IF_FLOAT_OR_SPECIAL(T) …
#define HWY_IF_NOT_FLOAT_NOR_SPECIAL(T) …
#define HWY_IF_INTEGER(T) …
#define HWY_IF_T_SIZE(T, bytes) …
#define HWY_IF_NOT_T_SIZE(T, bytes) …
#define HWY_IF_T_SIZE_ONE_OF(T, bit_array) …
#define HWY_IF_T_SIZE_LE(T, bytes) …
#define HWY_IF_T_SIZE_GT(T, bytes) …
#define HWY_IF_SAME(T, expected) …
#define HWY_IF_NOT_SAME(T, expected) …
#define HWY_IF_SAME2(T, expected1, expected2) …
#define HWY_IF_U8(T) …
#define HWY_IF_U16(T) …
#define HWY_IF_U32(T) …
#define HWY_IF_U64(T) …
#define HWY_IF_I8(T) …
#define HWY_IF_I16(T) …
#define HWY_IF_I32(T) …
#define HWY_IF_I64(T) …
#define HWY_IF_BF16(T) …
#define HWY_IF_NOT_BF16(T) …
#define HWY_IF_F16(T) …
#define HWY_IF_NOT_F16(T) …
#define HWY_IF_F32(T) …
#define HWY_IF_F64(T) …
#define HWY_IF_UI8(T) …
#define HWY_IF_UI16(T) …
#define HWY_IF_UI32(T) …
#define HWY_IF_UI64(T) …
#define HWY_IF_LANES_PER_BLOCK(T, N, LANES) …
template <size_t N>
struct SizeTag { … };
template <class T>
class DeclValT { … };
template <class T>
HWY_API typename DeclValT<T>::type DeclVal() noexcept { … }
template <class T>
struct IsArrayT { … };
IsArrayT<T[]>;
IsArrayT<T[N]>;
template <class T>
static constexpr bool IsArray() { … }
#if HWY_COMPILER_MSVC
HWY_DIAGNOSTICS(push)
HWY_DIAGNOSTICS_OFF(disable : 4180, ignored "-Wignored-qualifiers")
#endif
template <class From, class To>
class IsConvertibleT { … };
#if HWY_COMPILER_MSVC
HWY_DIAGNOSTICS(pop)
#endif
template <class From, class To>
HWY_API constexpr bool IsConvertible() { … }
template <class From, class To>
class IsStaticCastableT { … };
template <class From, class To>
static constexpr bool IsStaticCastable() { … }
#define HWY_IF_CASTABLE(From, To) …
#define HWY_IF_OP_CASTABLE(op, T, Native) …
template <class T, class From>
class IsAssignableT { … };
template <class T, class From>
static constexpr bool IsAssignable() { … }
#define HWY_IF_ASSIGNABLE(T, From) …
template <typename T>
HWY_API constexpr bool IsSpecialFloat() { … }
template <class T>
HWY_API constexpr bool IsIntegerLaneType() { … }
template <>
HWY_INLINE constexpr bool IsIntegerLaneType<int8_t>() { … }
template <>
HWY_INLINE constexpr bool IsIntegerLaneType<uint8_t>() { … }
template <>
HWY_INLINE constexpr bool IsIntegerLaneType<int16_t>() { … }
template <>
HWY_INLINE constexpr bool IsIntegerLaneType<uint16_t>() { … }
template <>
HWY_INLINE constexpr bool IsIntegerLaneType<int32_t>() { … }
template <>
HWY_INLINE constexpr bool IsIntegerLaneType<uint32_t>() { … }
template <>
HWY_INLINE constexpr bool IsIntegerLaneType<int64_t>() { … }
template <>
HWY_INLINE constexpr bool IsIntegerLaneType<uint64_t>() { … }
template <class T>
HWY_API constexpr bool IsInteger() { … }
template <>
HWY_INLINE constexpr bool IsInteger<bool>() { … }
template <>
HWY_INLINE constexpr bool IsInteger<char>() { … }
template <>
HWY_INLINE constexpr bool IsInteger<signed char>() { … }
template <>
HWY_INLINE constexpr bool IsInteger<unsigned char>() { … }
template <>
HWY_INLINE constexpr bool IsInteger<short>() { … }
template <>
HWY_INLINE constexpr bool IsInteger<unsigned short>() { … }
template <>
HWY_INLINE constexpr bool IsInteger<int>() { … }
template <>
HWY_INLINE constexpr bool IsInteger<unsigned>() { … }
template <>
HWY_INLINE constexpr bool IsInteger<long>() { … }
template <>
HWY_INLINE constexpr bool IsInteger<unsigned long>() { … }
template <>
HWY_INLINE constexpr bool IsInteger<long long>() { … }
template <>
HWY_INLINE constexpr bool IsInteger<unsigned long long>() { … }
#if defined(__cpp_char8_t) && __cpp_char8_t >= 201811L
template <>
HWY_INLINE constexpr bool IsInteger<char8_t>() { … }
#endif
template <>
HWY_INLINE constexpr bool IsInteger<char16_t>() { … }
template <>
HWY_INLINE constexpr bool IsInteger<char32_t>() { … }
#if HWY_HAS_BUILTIN(__builtin_bit_cast) || HWY_COMPILER_MSVC >= 1926
#define HWY_BITCASTSCALAR_CONSTEXPR …
#else
#define HWY_BITCASTSCALAR_CONSTEXPR
#endif
#if __cpp_constexpr >= 201304L
#define HWY_BITCASTSCALAR_CXX14_CONSTEXPR …
#else
#define HWY_BITCASTSCALAR_CXX14_CONSTEXPR
#endif
#if HWY_HAS_BUILTIN(__builtin_bit_cast) || HWY_COMPILER_MSVC >= 1926
namespace detail {
template <class From>
struct BitCastScalarSrcCastHelper { … };
#if HWY_COMPILER_CLANG >= 900 && HWY_COMPILER_CLANG < 1000
template <class To, class From,
hwy::EnableIf<hwy::IsInteger<RemoveCvRef<To>>() &&
hwy::IsInteger<RemoveCvRef<From>>()>* = nullptr>
static HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR To
BuiltinBitCastScalar(const From& val) {
static_assert(sizeof(To) == sizeof(From),
"sizeof(To) == sizeof(From) must be true");
return static_cast<To>(val);
}
template <class To, class From,
hwy::EnableIf<!(hwy::IsInteger<RemoveCvRef<To>>() &&
hwy::IsInteger<RemoveCvRef<From>>())>* = nullptr>
static HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR To
BuiltinBitCastScalar(const From& val) {
return __builtin_bit_cast(To, val);
}
#endif
}
template <class To, class From, HWY_IF_NOT_SPECIAL_FLOAT(To)>
HWY_API HWY_BITCASTSCALAR_CONSTEXPR To BitCastScalar(const From& val) { … }
template <class To, class From, HWY_IF_SPECIAL_FLOAT(To)>
HWY_API HWY_BITCASTSCALAR_CONSTEXPR To BitCastScalar(const From& val) { … }
#else
template <class To, class From>
HWY_API HWY_BITCASTSCALAR_CONSTEXPR To BitCastScalar(const From& val) {
To result;
CopySameSize(&val, &result);
return result;
}
#endif
#pragma pack(push, 1)
#if (HWY_ARCH_ARM_A64 && !HWY_COMPILER_MSVC) || \
(HWY_COMPILER_CLANG && defined(__ARM_FP) && (__ARM_FP & 2)) || \
(HWY_COMPILER_GCC_ACTUAL && defined(__ARM_FP16_FORMAT_IEEE))
#define HWY_NEON_HAVE_F16C …
#else
#define HWY_NEON_HAVE_F16C …
#endif
#if HWY_ARCH_RISCV && defined(__riscv_zvfh) && HWY_COMPILER_CLANG >= 1600
#define HWY_RVV_HAVE_F16_VEC …
#else
#define HWY_RVV_HAVE_F16_VEC …
#endif
#if HWY_ARCH_X86 && defined(__SSE2__) && defined(__FLT16_MAX__) && \
((HWY_COMPILER_CLANG >= 1500 && !HWY_COMPILER_CLANGCL) || \
HWY_COMPILER_GCC_ACTUAL >= 1200)
#define HWY_SSE2_HAVE_F16_TYPE …
#else
#define HWY_SSE2_HAVE_F16_TYPE …
#endif
#ifndef HWY_HAVE_SCALAR_F16_TYPE
#if HWY_NEON_HAVE_F16C || HWY_RVV_HAVE_F16_VEC || HWY_SSE2_HAVE_F16_TYPE
#define HWY_HAVE_SCALAR_F16_TYPE …
#else
#define HWY_HAVE_SCALAR_F16_TYPE …
#endif
#endif
#ifndef HWY_HAVE_SCALAR_F16_OPERATORS
#if HWY_HAVE_SCALAR_F16_TYPE && \
(HWY_COMPILER_CLANG >= 1800 || HWY_COMPILER_GCC_ACTUAL >= 1200 || \
(HWY_COMPILER_CLANG >= 1500 && !HWY_COMPILER_CLANGCL && \
!defined(_WIN32)) || \
(HWY_ARCH_ARM && \
(HWY_COMPILER_CLANG >= 900 || HWY_COMPILER_GCC_ACTUAL >= 800)))
#define HWY_HAVE_SCALAR_F16_OPERATORS …
#else
#define HWY_HAVE_SCALAR_F16_OPERATORS …
#endif
#endif
namespace detail {
template <class T, class TVal = RemoveCvRef<T>, bool = IsSpecialFloat<TVal>()>
struct SpecialFloatUnwrapArithOpOperandT { … };
SpecialFloatUnwrapArithOpOperandT<T, TVal, false>;
SpecialFloatUnwrapArithOpOperand;
template <class T, class TVal = RemoveCvRef<T>>
struct NativeSpecialFloatToWrapperT {
using type = T;
};
NativeSpecialFloatToWrapper;
}
struct alignas(2) float16_t { … };
static_assert …;
#if HWY_HAVE_SCALAR_F16_TYPE
namespace detail {
#if HWY_HAVE_SCALAR_F16_OPERATORS
SpecialFloatUnwrapArithOpOperandT<T, hwy::float16_t, true>;
#endif
NativeSpecialFloatToWrapperT<T, hwy::float16_t::Native>;
}
#endif
#if HWY_HAS_BUILTIN(__builtin_bit_cast) || HWY_COMPILER_MSVC >= 1926
namespace detail {
template <>
struct BitCastScalarSrcCastHelper<hwy::float16_t> { … };
}
#endif
#if HWY_HAVE_SCALAR_F16_OPERATORS
#define HWY_F16_CONSTEXPR …
#else
#define HWY_F16_CONSTEXPR …
#endif
HWY_API HWY_F16_CONSTEXPR float F32FromF16(float16_t f16) { … }
#if HWY_IS_DEBUG_BUILD && \
(HWY_HAS_BUILTIN(__builtin_bit_cast) || HWY_COMPILER_MSVC >= 1926)
#if defined(__cpp_if_consteval) && __cpp_if_consteval >= 202106L
#define HWY_F16_FROM_F32_DASSERT …
#elif HWY_HAS_BUILTIN(__builtin_is_constant_evaluated) || \
HWY_COMPILER_MSVC >= 1926
#define HWY_F16_FROM_F32_DASSERT(condition) …
#else
#define HWY_F16_FROM_F32_DASSERT …
#endif
#else
#define HWY_F16_FROM_F32_DASSERT …
#endif
HWY_API HWY_F16_CONSTEXPR float16_t F16FromF32(float f32) { … }
HWY_API HWY_F16_CONSTEXPR float16_t F16FromF64(double f64) { … }
HWY_F16_CONSTEXPR inline bool operator==(float16_t lhs,
float16_t rhs) noexcept { … }
HWY_F16_CONSTEXPR inline bool operator!=(float16_t lhs,
float16_t rhs) noexcept { … }
HWY_F16_CONSTEXPR inline bool operator<(float16_t lhs, float16_t rhs) noexcept { … }
HWY_F16_CONSTEXPR inline bool operator<=(float16_t lhs,
float16_t rhs) noexcept { … }
HWY_F16_CONSTEXPR inline bool operator>(float16_t lhs, float16_t rhs) noexcept { … }
HWY_F16_CONSTEXPR inline bool operator>=(float16_t lhs,
float16_t rhs) noexcept { … }
#if HWY_HAVE_CXX20_THREE_WAY_COMPARE
HWY_F16_CONSTEXPR inline std::partial_ordering operator<=>(
float16_t lhs, float16_t rhs) noexcept { … }
#endif
#if HWY_ARCH_ARM_A64 && \
(HWY_COMPILER_CLANG >= 1700 || HWY_COMPILER_GCC_ACTUAL >= 1400)
#define HWY_ARM_HAVE_SCALAR_BF16_TYPE …
#else
#define HWY_ARM_HAVE_SCALAR_BF16_TYPE …
#endif
#ifndef HWY_SSE2_HAVE_SCALAR_BF16_TYPE
#if HWY_ARCH_X86 && defined(__SSE2__) && \
((HWY_COMPILER_CLANG >= 1700 && !HWY_COMPILER_CLANGCL) || \
HWY_COMPILER_GCC_ACTUAL >= 1300)
#define HWY_SSE2_HAVE_SCALAR_BF16_TYPE …
#else
#define HWY_SSE2_HAVE_SCALAR_BF16_TYPE …
#endif
#endif
#if HWY_ARM_HAVE_SCALAR_BF16_TYPE || HWY_SSE2_HAVE_SCALAR_BF16_TYPE
#define HWY_HAVE_SCALAR_BF16_TYPE …
#else
#define HWY_HAVE_SCALAR_BF16_TYPE …
#endif
#ifndef HWY_HAVE_SCALAR_BF16_OPERATORS
#if HWY_HAVE_SCALAR_BF16_TYPE && (HWY_COMPILER_GCC_ACTUAL >= 1300)
#define HWY_HAVE_SCALAR_BF16_OPERATORS …
#else
#define HWY_HAVE_SCALAR_BF16_OPERATORS …
#endif
#endif
#if HWY_HAVE_SCALAR_BF16_OPERATORS
#define HWY_BF16_CONSTEXPR …
#else
#define HWY_BF16_CONSTEXPR …
#endif
struct alignas(2) bfloat16_t { … };
static_assert …;
#pragma pack(pop)
#if HWY_HAVE_SCALAR_BF16_TYPE
namespace detail {
#if HWY_HAVE_SCALAR_BF16_OPERATORS
template <class T>
struct SpecialFloatUnwrapArithOpOperandT<T, hwy::bfloat16_t, true> {
using type = hwy::bfloat16_t::Native;
};
#endif
NativeSpecialFloatToWrapperT<T, hwy::bfloat16_t::Native>;
}
#endif
#if HWY_HAS_BUILTIN(__builtin_bit_cast) || HWY_COMPILER_MSVC >= 1926
namespace detail {
template <>
struct BitCastScalarSrcCastHelper<hwy::bfloat16_t> { … };
}
#endif
HWY_API HWY_BF16_CONSTEXPR float F32FromBF16(bfloat16_t bf) { … }
namespace detail {
static HWY_INLINE HWY_MAYBE_UNUSED constexpr uint32_t F32BitsToBF16RoundIncr(
const uint32_t f32_bits) { … }
static HWY_INLINE HWY_MAYBE_UNUSED constexpr uint16_t F32BitsToBF16Bits(
const uint32_t f32_bits) { … }
}
HWY_API HWY_BF16_CONSTEXPR bfloat16_t BF16FromF32(float f) { … }
HWY_API HWY_BF16_CONSTEXPR bfloat16_t BF16FromF64(double f64) { … }
HWY_BF16_CONSTEXPR inline bool operator==(bfloat16_t lhs,
bfloat16_t rhs) noexcept { … }
HWY_BF16_CONSTEXPR inline bool operator!=(bfloat16_t lhs,
bfloat16_t rhs) noexcept { … }
HWY_BF16_CONSTEXPR inline bool operator<(bfloat16_t lhs,
bfloat16_t rhs) noexcept { … }
HWY_BF16_CONSTEXPR inline bool operator<=(bfloat16_t lhs,
bfloat16_t rhs) noexcept { … }
HWY_BF16_CONSTEXPR inline bool operator>(bfloat16_t lhs,
bfloat16_t rhs) noexcept { … }
HWY_BF16_CONSTEXPR inline bool operator>=(bfloat16_t lhs,
bfloat16_t rhs) noexcept { … }
#if HWY_HAVE_CXX20_THREE_WAY_COMPARE
HWY_BF16_CONSTEXPR inline std::partial_ordering operator<=>(
bfloat16_t lhs, bfloat16_t rhs) noexcept { … }
#endif
namespace detail {
template <typename T>
struct Relations;
template <>
struct Relations<uint8_t> { … };
template <>
struct Relations<int8_t> { … };
template <>
struct Relations<uint16_t> { … };
template <>
struct Relations<int16_t> { … };
template <>
struct Relations<uint32_t> { … };
template <>
struct Relations<int32_t> { … };
template <>
struct Relations<uint64_t> { … };
template <>
struct Relations<int64_t> { … };
template <>
struct Relations<uint128_t> { … };
template <>
struct Relations<float16_t> { … };
template <>
struct Relations<bfloat16_t> { … };
template <>
struct Relations<float> { … };
template <>
struct Relations<double> { … };
template <size_t N>
struct TypeFromSize;
template <>
struct TypeFromSize<1> { … };
template <>
struct TypeFromSize<2> { … };
template <>
struct TypeFromSize<4> { … };
template <>
struct TypeFromSize<8> { … };
template <>
struct TypeFromSize<16> { … };
}
MakeUnsigned;
MakeSigned;
MakeFloat;
MakeWide;
MakeNarrow;
UnsignedFromSize;
SignedFromSize;
FloatFromSize;
UnsignedTag;
SignedTag;
FloatTag;
SpecialTag;
template <typename T, class R = detail::Relations<T>>
constexpr auto TypeTag()
-> hwy::SizeTag<((R::is_signed + R::is_float + R::is_bf16) << 8)> { … }
NonFloatTag;
template <typename T, class R = detail::Relations<T>>
constexpr auto IsFloatTag() -> hwy::SizeTag<(R::is_float ? 0x200 : 0x400)> { … }
template <typename T>
HWY_API constexpr bool IsFloat3264() { … }
template <typename T>
HWY_API constexpr bool IsFloat() { … }
template <typename T>
HWY_API constexpr bool IsSigned() { … }
template <>
constexpr bool IsSigned<float16_t>() { … }
template <>
constexpr bool IsSigned<bfloat16_t>() { … }
template <>
constexpr bool IsSigned<hwy::uint128_t>() { … }
template <>
constexpr bool IsSigned<hwy::K64V64>() { … }
template <>
constexpr bool IsSigned<hwy::K32V32>() { … }
template <typename T, bool = IsInteger<T>() && !IsIntegerLaneType<T>()>
struct MakeLaneTypeIfIntegerT { … };
MakeLaneTypeIfIntegerT<T, true>;
MakeLaneTypeIfInteger;
template <typename T>
HWY_API constexpr T LimitsMax() { … }
template <typename T>
HWY_API constexpr T LimitsMin() { … }
template <typename T>
HWY_API HWY_BITCASTSCALAR_CONSTEXPR T LowestValue() { … }
template <>
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR bfloat16_t LowestValue<bfloat16_t>() { … }
template <>
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR float16_t LowestValue<float16_t>() { … }
template <>
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR float LowestValue<float>() { … }
template <>
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR double LowestValue<double>() { … }
template <typename T>
HWY_API HWY_BITCASTSCALAR_CONSTEXPR T HighestValue() { … }
template <>
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR bfloat16_t HighestValue<bfloat16_t>() { … }
template <>
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR float16_t HighestValue<float16_t>() { … }
template <>
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR float HighestValue<float>() { … }
template <>
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR double HighestValue<double>() { … }
template <typename T>
HWY_API HWY_BITCASTSCALAR_CONSTEXPR T Epsilon() { … }
template <>
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR bfloat16_t Epsilon<bfloat16_t>() { … }
template <>
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR float16_t Epsilon<float16_t>() { … }
template <>
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR float Epsilon<float>() { … }
template <>
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR double Epsilon<double>() { … }
template <typename T>
constexpr int MantissaBits() { … }
template <>
constexpr int MantissaBits<bfloat16_t>() { … }
template <>
constexpr int MantissaBits<float16_t>() { … }
template <>
constexpr int MantissaBits<float>() { … }
template <>
constexpr int MantissaBits<double>() { … }
template <typename T>
constexpr MakeSigned<T> MaxExponentTimes2() { … }
template <typename T>
constexpr MakeUnsigned<T> SignMask() { … }
template <typename T>
constexpr MakeUnsigned<T> ExponentMask() { … }
template <typename T>
constexpr MakeUnsigned<T> MantissaMask() { … }
template <typename T>
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR T MantissaEnd() { … }
template <>
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR bfloat16_t MantissaEnd<bfloat16_t>() { … }
template <>
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR float16_t MantissaEnd<float16_t>() { … }
template <>
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR float MantissaEnd<float>() { … }
template <>
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR double MantissaEnd<double>() { … }
template <typename T>
constexpr int ExponentBits() { … }
template <typename T>
constexpr MakeSigned<T> MaxExponentField() { … }
#if HWY_HAVE_SCALAR_F16_OPERATORS || HWY_HAVE_SCALAR_BF16_OPERATORS
#define HWY_RHS_SPECIAL_FLOAT_ARITH_OP …
#define HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP …
#if HWY_HAVE_SCALAR_F16_OPERATORS
HWY_RHS_SPECIAL_FLOAT_ARITH_OP(+, operator+, float16_t)
HWY_RHS_SPECIAL_FLOAT_ARITH_OP(-, operator-, float16_t)
HWY_RHS_SPECIAL_FLOAT_ARITH_OP(*, operator*, float16_t)
HWY_RHS_SPECIAL_FLOAT_ARITH_OP(/, operator/, float16_t)
HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(==, operator==, float16_t)
HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(!=, operator!=, float16_t)
HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(<, operator<, float16_t)
HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(<=, operator<=, float16_t)
HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(>, operator>, float16_t)
HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(>=, operator>=, float16_t)
#if HWY_HAVE_CXX20_THREE_WAY_COMPARE
HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(<=>, operator<=>, float16_t)
#endif
#endif
#if HWY_HAVE_SCALAR_BF16_OPERATORS
HWY_RHS_SPECIAL_FLOAT_ARITH_OP(+, operator+, bfloat16_t)
HWY_RHS_SPECIAL_FLOAT_ARITH_OP(-, operator-, bfloat16_t)
HWY_RHS_SPECIAL_FLOAT_ARITH_OP(*, operator*, bfloat16_t)
HWY_RHS_SPECIAL_FLOAT_ARITH_OP(/, operator/, bfloat16_t)
HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(==, operator==, bfloat16_t)
HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(!=, operator!=, bfloat16_t)
HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(<, operator<, bfloat16_t)
HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(<=, operator<=, bfloat16_t)
HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(>, operator>, bfloat16_t)
HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(>=, operator>=, bfloat16_t)
#if HWY_HAVE_CXX20_THREE_WAY_COMPARE
HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(<=>, operator<=>, bfloat16_t)
#endif
#endif
#undef HWY_RHS_SPECIAL_FLOAT_ARITH_OP
#undef HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP
#endif
HWY_API float F32FromF16Mem(const void* ptr) { … }
HWY_API float F32FromBF16Mem(const void* ptr) { … }
#if HWY_HAVE_SCALAR_F16_OPERATORS
#define HWY_BF16_TO_F16_CONSTEXPR …
#else
#define HWY_BF16_TO_F16_CONSTEXPR …
#endif
template <typename TTo, typename TFrom, HWY_IF_NOT_SPECIAL_FLOAT(TTo),
HWY_IF_NOT_SPECIAL_FLOAT(TFrom), HWY_IF_NOT_SAME(TTo, TFrom)>
HWY_API constexpr TTo ConvertScalarTo(const TFrom in) { … }
template <typename TTo, typename TFrom, HWY_IF_F16(TTo),
HWY_IF_NOT_SPECIAL_FLOAT(TFrom), HWY_IF_NOT_SAME(TFrom, double)>
HWY_API constexpr TTo ConvertScalarTo(const TFrom in) { … }
template <typename TTo, HWY_IF_F16(TTo)>
HWY_API HWY_BF16_TO_F16_CONSTEXPR TTo
ConvertScalarTo(const hwy::bfloat16_t in) { … }
template <typename TTo, HWY_IF_F16(TTo)>
HWY_API HWY_F16_CONSTEXPR TTo ConvertScalarTo(const double in) { … }
template <typename TTo, typename TFrom, HWY_IF_BF16(TTo),
HWY_IF_NOT_SPECIAL_FLOAT(TFrom), HWY_IF_NOT_SAME(TFrom, double)>
HWY_API HWY_BF16_CONSTEXPR TTo ConvertScalarTo(const TFrom in) { … }
template <typename TTo, HWY_IF_BF16(TTo)>
HWY_API HWY_BF16_TO_F16_CONSTEXPR TTo ConvertScalarTo(const hwy::float16_t in) { … }
template <typename TTo, HWY_IF_BF16(TTo)>
HWY_API HWY_BF16_CONSTEXPR TTo ConvertScalarTo(const double in) { … }
template <typename TTo, typename TFrom, HWY_IF_F16(TFrom),
HWY_IF_NOT_SPECIAL_FLOAT(TTo)>
HWY_API HWY_F16_CONSTEXPR TTo ConvertScalarTo(const TFrom in) { … }
template <typename TTo, typename TFrom, HWY_IF_BF16(TFrom),
HWY_IF_NOT_SPECIAL_FLOAT(TTo)>
HWY_API HWY_BF16_CONSTEXPR TTo ConvertScalarTo(TFrom in) { … }
template <typename TTo>
HWY_API constexpr TTo ConvertScalarTo(TTo in) { … }
template <typename T1, typename T2>
constexpr inline T1 DivCeil(T1 a, T2 b) { … }
constexpr inline size_t RoundUpTo(size_t what, size_t align) { … }
constexpr inline size_t RoundDownTo(size_t what, size_t align) { … }
namespace detail {
template <class T>
static HWY_INLINE constexpr T ScalarShr(hwy::UnsignedTag , T val,
int shift_amt) { … }
template <class T>
static HWY_INLINE constexpr T ScalarShr(hwy::SignedTag , T val,
int shift_amt) { … }
}
template <class T, HWY_IF_INTEGER(RemoveCvRef<T>)>
HWY_API constexpr RemoveCvRef<T> ScalarShr(T val, int shift_amt) { … }
HWY_API size_t Num0BitsBelowLS1Bit_Nonzero32(const uint32_t x) { … }
HWY_API size_t Num0BitsBelowLS1Bit_Nonzero64(const uint64_t x) { … }
HWY_API size_t Num0BitsAboveMS1Bit_Nonzero32(const uint32_t x) { … }
HWY_API size_t Num0BitsAboveMS1Bit_Nonzero64(const uint64_t x) { … }
template <class T, HWY_IF_INTEGER(RemoveCvRef<T>),
HWY_IF_T_SIZE_ONE_OF(RemoveCvRef<T>, (1 << 1) | (1 << 2) | (1 << 4))>
HWY_API size_t PopCount(T x) { … }
template <class T, HWY_IF_INTEGER(RemoveCvRef<T>),
HWY_IF_T_SIZE(RemoveCvRef<T>, 8)>
HWY_API size_t PopCount(T x) { … }
template <typename TI>
constexpr size_t FloorLog2(TI x) { … }
template <typename TI>
constexpr size_t CeilLog2(TI x) { … }
template <typename T, typename T2, HWY_IF_FLOAT(T), HWY_IF_NOT_SPECIAL_FLOAT(T)>
HWY_INLINE constexpr T AddWithWraparound(T t, T2 increment) { … }
template <typename T, typename T2, HWY_IF_SPECIAL_FLOAT(T)>
HWY_INLINE constexpr T AddWithWraparound(T t, T2 increment) { … }
template <typename T, typename T2, HWY_IF_NOT_FLOAT(T)>
HWY_INLINE constexpr T AddWithWraparound(T t, T2 n) { … }
#if HWY_COMPILER_MSVC && HWY_ARCH_X86_64
#pragma intrinsic(_mul128)
#pragma intrinsic(_umul128)
#endif
HWY_API uint64_t Mul128(uint64_t a, uint64_t b, uint64_t* HWY_RESTRICT upper) { … }
HWY_API int64_t Mul128(int64_t a, int64_t b, int64_t* HWY_RESTRICT upper) { … }
class Divisor { … };
namespace detail {
template <typename T>
static HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR T ScalarAbs(hwy::FloatTag ,
T val) { … }
template <typename T>
static HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR T
ScalarAbs(hwy::SpecialTag , T val) { … }
template <typename T>
static HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR T
ScalarAbs(hwy::SignedTag , T val) { … }
template <typename T>
static HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR T
ScalarAbs(hwy::UnsignedTag , T val) { … }
}
template <typename T>
HWY_API HWY_BITCASTSCALAR_CONSTEXPR RemoveCvRef<T> ScalarAbs(T val) { … }
template <typename T>
HWY_API HWY_BITCASTSCALAR_CONSTEXPR bool ScalarIsNaN(T val) { … }
template <typename T>
HWY_API HWY_BITCASTSCALAR_CONSTEXPR bool ScalarIsInf(T val) { … }
namespace detail {
template <typename T>
static HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR bool ScalarIsFinite(
hwy::FloatTag , T val) { … }
template <typename T>
static HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR bool ScalarIsFinite(
hwy::NonFloatTag , T ) { … }
}
template <typename T>
HWY_API HWY_BITCASTSCALAR_CONSTEXPR bool ScalarIsFinite(T val) { … }
template <typename T>
HWY_API HWY_BITCASTSCALAR_CONSTEXPR RemoveCvRef<T> ScalarCopySign(T magn,
T sign) { … }
template <typename T>
HWY_API HWY_BITCASTSCALAR_CONSTEXPR bool ScalarSignBit(T val) { … }
#if HWY_ARCH_PPC && (HWY_COMPILER_GCC || HWY_COMPILER_CLANG) && \
!defined(_SOFT_FLOAT)
template <class T, HWY_IF_F32(T)>
HWY_API void PreventElision(T&& output) {
asm volatile("" : "+f"(output)::"memory");
}
template <class T, HWY_IF_F64(T)>
HWY_API void PreventElision(T&& output) {
asm volatile("" : "+d"(output)::"memory");
}
template <class T, HWY_IF_NOT_FLOAT3264(T)>
HWY_API void PreventElision(T&& output) {
asm volatile("" : "+r"(output)::"memory");
}
#else
template <class T>
HWY_API void PreventElision(T&& output) { … }
#endif
}
#endif