#ifndef SKVX_DEFINED
#define SKVX_DEFINED
#include "include/private/base/SkFeatures.h"
#include "src/base/SkUtils.h"
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstdint>
#include <cstring>
#include <initializer_list>
#include <type_traits>
#include <utility>
#if !defined(SKNX_NO_SIMD)
#define SKVX_USE_SIMD …
#else
#define SKVX_USE_SIMD …
#endif
#if SKVX_USE_SIMD
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
#include <immintrin.h>
#elif defined(SK_ARM_HAS_NEON)
#include <arm_neon.h>
#elif defined(__wasm_simd128__)
#include <wasm_simd128.h>
#elif SK_CPU_LSX_LEVEL >= SK_CPU_LSX_LEVEL_LASX
#include <lasxintrin.h>
#include <lsxintrin.h>
#elif SK_CPU_LSX_LEVEL >= SK_CPU_LSX_LEVEL_LSX
#include <lsxintrin.h>
#endif
#endif
#if defined(_MSC_VER)
#define SKVX_ALWAYS_INLINE …
#else
#define SKVX_ALWAYS_INLINE …
#endif
#define SI …
#define SIT …
#define SIN …
#define SINT …
#define SINTU …
namespace skvx {
template <int N, typename T>
struct alignas(N*sizeof(T)) Vec;
template <int... Ix, int N, typename T>
SI Vec<sizeof...(Ix),T> shuffle(const Vec<N,T>&);
template <int N, typename T>
struct alignas(N*sizeof(T)) Vec { … };
Vec<4, T>;
Vec<2, T>;
Vec<1, T>;
template <typename T> struct Mask { … };
template <> struct Mask<float > { … };
template <> struct Mask<double> { … };
M;
SINT Vec<2*N,T> join(const Vec<N,T>& lo, const Vec<N,T>& hi) { … }
#if SKVX_USE_SIMD && (defined(__clang__) || defined(__GNUC__))
#if defined(__clang__)
VExt __attribute__((ext_vector_type(N)));
#elif defined(__GNUC__)
template <int N, typename T>
struct VExtHelper {
typedef T __attribute__((vector_size(N*sizeof(T)))) type;
};
template <int N, typename T>
using VExt = typename VExtHelper<N,T>::type;
SI Vec<4,float> to_vec(VExt<4,float> v) { return sk_bit_cast<Vec<4,float>>(v); }
#endif
SINT VExt<N,T> to_vext(const Vec<N,T>& v) { … }
SINT Vec <N,T> to_vec(const VExt<N,T>& v) { … }
SINT Vec<N,T> operator+(const Vec<N,T>& x, const Vec<N,T>& y) { … }
SINT Vec<N,T> operator-(const Vec<N,T>& x, const Vec<N,T>& y) { … }
SINT Vec<N,T> operator*(const Vec<N,T>& x, const Vec<N,T>& y) { … }
SINT Vec<N,T> operator/(const Vec<N,T>& x, const Vec<N,T>& y) { … }
SINT Vec<N,T> operator^(const Vec<N,T>& x, const Vec<N,T>& y) { … }
SINT Vec<N,T> operator&(const Vec<N,T>& x, const Vec<N,T>& y) { … }
SINT Vec<N,T> operator|(const Vec<N,T>& x, const Vec<N,T>& y) { … }
SINT Vec<N,T> operator!(const Vec<N,T>& x) { … }
SINT Vec<N,T> operator-(const Vec<N,T>& x) { … }
SINT Vec<N,T> operator~(const Vec<N,T>& x) { … }
SINT Vec<N,T> operator<<(const Vec<N,T>& x, int k) { … }
SINT Vec<N,T> operator>>(const Vec<N,T>& x, int k) { … }
SINT Vec<N,M<T>> operator==(const Vec<N,T>& x, const Vec<N,T>& y) { … }
SINT Vec<N,M<T>> operator!=(const Vec<N,T>& x, const Vec<N,T>& y) { … }
SINT Vec<N,M<T>> operator<=(const Vec<N,T>& x, const Vec<N,T>& y) { … }
SINT Vec<N,M<T>> operator>=(const Vec<N,T>& x, const Vec<N,T>& y) { … }
SINT Vec<N,M<T>> operator< (const Vec<N,T>& x, const Vec<N,T>& y) { … }
SINT Vec<N,M<T>> operator> (const Vec<N,T>& x, const Vec<N,T>& y) { … }
#else
SIT Vec<1,T> operator+(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val + y.val; }
SIT Vec<1,T> operator-(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val - y.val; }
SIT Vec<1,T> operator*(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val * y.val; }
SIT Vec<1,T> operator/(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val / y.val; }
SIT Vec<1,T> operator^(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val ^ y.val; }
SIT Vec<1,T> operator&(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val & y.val; }
SIT Vec<1,T> operator|(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val | y.val; }
SIT Vec<1,T> operator!(const Vec<1,T>& x) { return !x.val; }
SIT Vec<1,T> operator-(const Vec<1,T>& x) { return -x.val; }
SIT Vec<1,T> operator~(const Vec<1,T>& x) { return ~x.val; }
SIT Vec<1,T> operator<<(const Vec<1,T>& x, int k) { return x.val << k; }
SIT Vec<1,T> operator>>(const Vec<1,T>& x, int k) { return x.val >> k; }
SIT Vec<1,M<T>> operator==(const Vec<1,T>& x, const Vec<1,T>& y) {
return x.val == y.val ? ~0 : 0;
}
SIT Vec<1,M<T>> operator!=(const Vec<1,T>& x, const Vec<1,T>& y) {
return x.val != y.val ? ~0 : 0;
}
SIT Vec<1,M<T>> operator<=(const Vec<1,T>& x, const Vec<1,T>& y) {
return x.val <= y.val ? ~0 : 0;
}
SIT Vec<1,M<T>> operator>=(const Vec<1,T>& x, const Vec<1,T>& y) {
return x.val >= y.val ? ~0 : 0;
}
SIT Vec<1,M<T>> operator< (const Vec<1,T>& x, const Vec<1,T>& y) {
return x.val < y.val ? ~0 : 0;
}
SIT Vec<1,M<T>> operator> (const Vec<1,T>& x, const Vec<1,T>& y) {
return x.val > y.val ? ~0 : 0;
}
SINT Vec<N,T> operator+(const Vec<N,T>& x, const Vec<N,T>& y) {
return join(x.lo + y.lo, x.hi + y.hi);
}
SINT Vec<N,T> operator-(const Vec<N,T>& x, const Vec<N,T>& y) {
return join(x.lo - y.lo, x.hi - y.hi);
}
SINT Vec<N,T> operator*(const Vec<N,T>& x, const Vec<N,T>& y) {
return join(x.lo * y.lo, x.hi * y.hi);
}
SINT Vec<N,T> operator/(const Vec<N,T>& x, const Vec<N,T>& y) {
return join(x.lo / y.lo, x.hi / y.hi);
}
SINT Vec<N,T> operator^(const Vec<N,T>& x, const Vec<N,T>& y) {
return join(x.lo ^ y.lo, x.hi ^ y.hi);
}
SINT Vec<N,T> operator&(const Vec<N,T>& x, const Vec<N,T>& y) {
return join(x.lo & y.lo, x.hi & y.hi);
}
SINT Vec<N,T> operator|(const Vec<N,T>& x, const Vec<N,T>& y) {
return join(x.lo | y.lo, x.hi | y.hi);
}
SINT Vec<N,T> operator!(const Vec<N,T>& x) { return join(!x.lo, !x.hi); }
SINT Vec<N,T> operator-(const Vec<N,T>& x) { return join(-x.lo, -x.hi); }
SINT Vec<N,T> operator~(const Vec<N,T>& x) { return join(~x.lo, ~x.hi); }
SINT Vec<N,T> operator<<(const Vec<N,T>& x, int k) { return join(x.lo << k, x.hi << k); }
SINT Vec<N,T> operator>>(const Vec<N,T>& x, int k) { return join(x.lo >> k, x.hi >> k); }
SINT Vec<N,M<T>> operator==(const Vec<N,T>& x, const Vec<N,T>& y) {
return join(x.lo == y.lo, x.hi == y.hi);
}
SINT Vec<N,M<T>> operator!=(const Vec<N,T>& x, const Vec<N,T>& y) {
return join(x.lo != y.lo, x.hi != y.hi);
}
SINT Vec<N,M<T>> operator<=(const Vec<N,T>& x, const Vec<N,T>& y) {
return join(x.lo <= y.lo, x.hi <= y.hi);
}
SINT Vec<N,M<T>> operator>=(const Vec<N,T>& x, const Vec<N,T>& y) {
return join(x.lo >= y.lo, x.hi >= y.hi);
}
SINT Vec<N,M<T>> operator< (const Vec<N,T>& x, const Vec<N,T>& y) {
return join(x.lo < y.lo, x.hi < y.hi);
}
SINT Vec<N,M<T>> operator> (const Vec<N,T>& x, const Vec<N,T>& y) {
return join(x.lo > y.lo, x.hi > y.hi);
}
#endif
SINTU Vec<N,T> operator+ (U x, const Vec<N,T>& y) { … }
SINTU Vec<N,T> operator- (U x, const Vec<N,T>& y) { … }
SINTU Vec<N,T> operator* (U x, const Vec<N,T>& y) { … }
SINTU Vec<N,T> operator/ (U x, const Vec<N,T>& y) { … }
SINTU Vec<N,T> operator^ (U x, const Vec<N,T>& y) { … }
SINTU Vec<N,T> operator& (U x, const Vec<N,T>& y) { … }
SINTU Vec<N,T> operator| (U x, const Vec<N,T>& y) { … }
SINTU Vec<N,M<T>> operator==(U x, const Vec<N,T>& y) { … }
SINTU Vec<N,M<T>> operator!=(U x, const Vec<N,T>& y) { … }
SINTU Vec<N,M<T>> operator<=(U x, const Vec<N,T>& y) { … }
SINTU Vec<N,M<T>> operator>=(U x, const Vec<N,T>& y) { … }
SINTU Vec<N,M<T>> operator< (U x, const Vec<N,T>& y) { … }
SINTU Vec<N,M<T>> operator> (U x, const Vec<N,T>& y) { … }
SINTU Vec<N,T> operator+ (const Vec<N,T>& x, U y) { … }
SINTU Vec<N,T> operator- (const Vec<N,T>& x, U y) { … }
SINTU Vec<N,T> operator* (const Vec<N,T>& x, U y) { … }
SINTU Vec<N,T> operator/ (const Vec<N,T>& x, U y) { … }
SINTU Vec<N,T> operator^ (const Vec<N,T>& x, U y) { … }
SINTU Vec<N,T> operator& (const Vec<N,T>& x, U y) { … }
SINTU Vec<N,T> operator| (const Vec<N,T>& x, U y) { … }
SINTU Vec<N,M<T>> operator==(const Vec<N,T>& x, U y) { … }
SINTU Vec<N,M<T>> operator!=(const Vec<N,T>& x, U y) { … }
SINTU Vec<N,M<T>> operator<=(const Vec<N,T>& x, U y) { … }
SINTU Vec<N,M<T>> operator>=(const Vec<N,T>& x, U y) { … }
SINTU Vec<N,M<T>> operator< (const Vec<N,T>& x, U y) { … }
SINTU Vec<N,M<T>> operator> (const Vec<N,T>& x, U y) { … }
SINT Vec<N,T>& operator+=(Vec<N,T>& x, const Vec<N,T>& y) { … }
SINT Vec<N,T>& operator-=(Vec<N,T>& x, const Vec<N,T>& y) { … }
SINT Vec<N,T>& operator*=(Vec<N,T>& x, const Vec<N,T>& y) { … }
SINT Vec<N,T>& operator/=(Vec<N,T>& x, const Vec<N,T>& y) { … }
SINT Vec<N,T>& operator^=(Vec<N,T>& x, const Vec<N,T>& y) { … }
SINT Vec<N,T>& operator&=(Vec<N,T>& x, const Vec<N,T>& y) { … }
SINT Vec<N,T>& operator|=(Vec<N,T>& x, const Vec<N,T>& y) { … }
SINTU Vec<N,T>& operator+=(Vec<N,T>& x, U y) { … }
SINTU Vec<N,T>& operator-=(Vec<N,T>& x, U y) { … }
SINTU Vec<N,T>& operator*=(Vec<N,T>& x, U y) { … }
SINTU Vec<N,T>& operator/=(Vec<N,T>& x, U y) { … }
SINTU Vec<N,T>& operator^=(Vec<N,T>& x, U y) { … }
SINTU Vec<N,T>& operator&=(Vec<N,T>& x, U y) { … }
SINTU Vec<N,T>& operator|=(Vec<N,T>& x, U y) { … }
SINT Vec<N,T>& operator<<=(Vec<N,T>& x, int bits) { … }
SINT Vec<N,T>& operator>>=(Vec<N,T>& x, int bits) { … }
SINT Vec<N,T> naive_if_then_else(const Vec<N,M<T>>& cond, const Vec<N,T>& t, const Vec<N,T>& e) { … }
SIT Vec<1,T> if_then_else(const Vec<1,M<T>>& cond, const Vec<1,T>& t, const Vec<1,T>& e) { … }
SINT Vec<N,T> if_then_else(const Vec<N,M<T>>& cond, const Vec<N,T>& t, const Vec<N,T>& e) { … }
SIT bool any(const Vec<1,T>& x) { … }
SINT bool any(const Vec<N,T>& x) { … }
SIT bool all(const Vec<1,T>& x) { … }
SINT bool all(const Vec<N,T>& x) { … }
template <typename D, typename S>
SI Vec<1,D> cast(const Vec<1,S>& src) { … }
template <typename D, int N, typename S>
SI Vec<N,D> cast(const Vec<N,S>& src) { … }
SIT T min(const Vec<1,T>& x) { … }
SIT T max(const Vec<1,T>& x) { … }
SINT T min(const Vec<N,T>& x) { … }
SINT T max(const Vec<N,T>& x) { … }
SINT Vec<N,T> min(const Vec<N,T>& x, const Vec<N,T>& y) { … }
SINT Vec<N,T> max(const Vec<N,T>& x, const Vec<N,T>& y) { … }
SINTU Vec<N,T> min(const Vec<N,T>& x, U y) { … }
SINTU Vec<N,T> max(const Vec<N,T>& x, U y) { … }
SINTU Vec<N,T> min(U x, const Vec<N,T>& y) { … }
SINTU Vec<N,T> max(U x, const Vec<N,T>& y) { … }
SINT Vec<N,T> pin(const Vec<N,T>& x, const Vec<N,T>& lo, const Vec<N,T>& hi) { … }
template <int... Ix, int N, typename T>
SI Vec<sizeof...(Ix),T> shuffle(const Vec<N,T>& x) { … }
template <typename Fn, typename... Args, size_t... I>
SI auto map(std::index_sequence<I...>,
Fn&& fn, const Args&... args) -> skvx::Vec<sizeof...(I), decltype(fn(args[0]...))> { … }
template <typename Fn, int N, typename T, typename... Rest>
auto map(Fn&& fn, const Vec<N,T>& first, const Rest&... rest) { … }
SIN Vec<N,float> ceil(const Vec<N,float>& x) { … }
SIN Vec<N,float> floor(const Vec<N,float>& x) { … }
SIN Vec<N,float> trunc(const Vec<N,float>& x) { … }
SIN Vec<N,float> round(const Vec<N,float>& x) { … }
SIN Vec<N,float> sqrt(const Vec<N,float>& x) { … }
SIN Vec<N,float> abs(const Vec<N,float>& x) { … }
SIN Vec<N,float> fma(const Vec<N,float>& x,
const Vec<N,float>& y,
const Vec<N,float>& z) { … }
SI Vec<1,int> lrint(const Vec<1,float>& x) { … }
SIN Vec<N,int> lrint(const Vec<N,float>& x) { … }
SIN Vec<N,float> fract(const Vec<N,float>& x) { … }
SIN Vec<N,uint16_t> to_half(const Vec<N,float>& x) { … }
SIN Vec<N,float> from_half(const Vec<N,uint16_t>& x) { … }
SIN Vec<N,uint8_t> div255(const Vec<N,uint16_t>& x) { … }
SIN Vec<N,uint8_t> approx_scale(const Vec<N,uint8_t>& x, const Vec<N,uint8_t>& y) { … }
SINT std::enable_if_t<std::is_unsigned_v<T>, Vec<N,T>> saturated_add(const Vec<N,T>& x,
const Vec<N,T>& y) { … }
class ScaledDividerU32 { … };
SIN Vec<N,uint16_t> mull(const Vec<N,uint8_t>& x,
const Vec<N,uint8_t>& y) { … }
SIN Vec<N,uint32_t> mull(const Vec<N,uint16_t>& x,
const Vec<N,uint16_t>& y) { … }
SIN Vec<N,uint16_t> mulhi(const Vec<N,uint16_t>& x,
const Vec<N,uint16_t>& y) { … }
SINT T dot(const Vec<N, T>& a, const Vec<N, T>& b) { … }
SIT T cross(const Vec<2, T>& a, const Vec<2, T>& b) { … }
SIN float length(const Vec<N, float>& v) { … }
SIN double length(const Vec<N, double>& v) { … }
SIN Vec<N, float> normalize(const Vec<N, float>& v) { … }
SIN Vec<N, double> normalize(const Vec<N, double>& v) { … }
SINT bool isfinite(const Vec<N, T>& v) { … }
SIT void strided_load4(const T* v,
Vec<1,T>& a,
Vec<1,T>& b,
Vec<1,T>& c,
Vec<1,T>& d) { … }
SINT void strided_load4(const T* v,
Vec<N,T>& a,
Vec<N,T>& b,
Vec<N,T>& c,
Vec<N,T>& d) { … }
#if SKVX_USE_SIMD && defined(SK_ARM_HAS_NEON)
#define IMPL_LOAD4_TRANSPOSED …
IMPL_LOAD4_TRANSPOSED(2, uint32_t, vld4_u32)
IMPL_LOAD4_TRANSPOSED(4, uint16_t, vld4_u16)
IMPL_LOAD4_TRANSPOSED(8, uint8_t, vld4_u8)
IMPL_LOAD4_TRANSPOSED(2, int32_t, vld4_s32)
IMPL_LOAD4_TRANSPOSED(4, int16_t, vld4_s16)
IMPL_LOAD4_TRANSPOSED(8, int8_t, vld4_s8)
IMPL_LOAD4_TRANSPOSED(2, float, vld4_f32)
IMPL_LOAD4_TRANSPOSED(4, uint32_t, vld4q_u32)
IMPL_LOAD4_TRANSPOSED(8, uint16_t, vld4q_u16)
IMPL_LOAD4_TRANSPOSED(16, uint8_t, vld4q_u8)
IMPL_LOAD4_TRANSPOSED(4, int32_t, vld4q_s32)
IMPL_LOAD4_TRANSPOSED(8, int16_t, vld4q_s16)
IMPL_LOAD4_TRANSPOSED(16, int8_t, vld4q_s8)
IMPL_LOAD4_TRANSPOSED(4, float, vld4q_f32)
#undef IMPL_LOAD4_TRANSPOSED
#elif SKVX_USE_SIMD && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
SI void strided_load4(const float* v,
Vec<4,float>& a,
Vec<4,float>& b,
Vec<4,float>& c,
Vec<4,float>& d) { … }
#elif SKVX_USE_SIMD && SKVX_CPU_LSX_LEVEL >= SK_CPU_LSX_LEVEL_LSX
#define _LSX_TRANSPOSE4 …
SI void strided_load4(const int* v,
Vec<4,int>& a,
Vec<4,int>& b,
Vec<4,int>& c,
Vec<4,int>& d) {
__m128i a_ = __lsx_vld(v, 0);
__m128i b_ = __lsx_vld(v, 16);
__m128i c_ = __lsx_vld(v, 32);
__m128i d_ = __lsx_vld(v, 48);
_LSX_TRANSPOSE4(a_, b_, c_, d_);
a = sk_bit_cast<Vec<4,int>>(a_);
b = sk_bit_cast<Vec<4,int>>(b_);
c = sk_bit_cast<Vec<4,int>>(c_);
d = sk_bit_cast<Vec<4,int>>(d_);
}
#endif
SIT void strided_load2(const T* v, Vec<1,T>& a, Vec<1,T>& b) { … }
SINT void strided_load2(const T* v, Vec<N,T>& a, Vec<N,T>& b) { … }
#if SKVX_USE_SIMD && defined(SK_ARM_HAS_NEON)
#define IMPL_LOAD2_TRANSPOSED …
IMPL_LOAD2_TRANSPOSED(2, uint32_t, vld2_u32)
IMPL_LOAD2_TRANSPOSED(4, uint16_t, vld2_u16)
IMPL_LOAD2_TRANSPOSED(8, uint8_t, vld2_u8)
IMPL_LOAD2_TRANSPOSED(2, int32_t, vld2_s32)
IMPL_LOAD2_TRANSPOSED(4, int16_t, vld2_s16)
IMPL_LOAD2_TRANSPOSED(8, int8_t, vld2_s8)
IMPL_LOAD2_TRANSPOSED(2, float, vld2_f32)
IMPL_LOAD2_TRANSPOSED(4, uint32_t, vld2q_u32)
IMPL_LOAD2_TRANSPOSED(8, uint16_t, vld2q_u16)
IMPL_LOAD2_TRANSPOSED(16, uint8_t, vld2q_u8)
IMPL_LOAD2_TRANSPOSED(4, int32_t, vld2q_s32)
IMPL_LOAD2_TRANSPOSED(8, int16_t, vld2q_s16)
IMPL_LOAD2_TRANSPOSED(16, int8_t, vld2q_s8)
IMPL_LOAD2_TRANSPOSED(4, float, vld2q_f32)
#undef IMPL_LOAD2_TRANSPOSED
#endif
float2;
float4;
float8;
double2;
double4;
double8;
byte2;
byte4;
byte8;
byte16;
int2;
int4;
int8;
ushort2;
ushort4;
ushort8;
uint2;
uint4;
uint8;
long2;
long4;
long8;
half2;
half4;
half8;
}
#undef SINTU
#undef SINT
#undef SIN
#undef SIT
#undef SI
#undef SKVX_ALWAYS_INLINE
#undef SKVX_USE_SIMD
#endif