using F = V<float>;
using I32 = V<int32_t>;
using U64 = V<uint64_t>;
using U32 = V<uint32_t>;
using U16 = V<uint16_t>;
using U8 = V<uint8_t>;
#if defined(__GNUC__) && !defined(__clang__)
static constexpr F F0 = F() + 0.0f,
F1 = F() + 1.0f,
FInfBits = F() + 0x7f800000;
#else
static constexpr F F0 = 0.0f,
F1 = 1.0f,
FInfBits = 0x7f800000;
#endif
#if !defined(USING_AVX) && N == 8 && defined(__AVX__)
#define USING_AVX
#endif
#if !defined(USING_AVX_F16C) && defined(USING_AVX) && defined(__F16C__)
#define USING_AVX_F16C
#endif
#if !defined(USING_AVX2) && defined(USING_AVX) && defined(__AVX2__)
#define USING_AVX2
#endif
#if !defined(USING_AVX512F) && N == 16 && defined(__AVX512F__) && defined(__AVX512DQ__)
#define USING_AVX512F
#endif
#if N > 1 && defined(__ARM_NEON)
#define USING_NEON
#if defined(__clang__)
#if __ARM_FP & 2
#define USING_NEON_F16C
#endif
#elif defined(__GNUC__)
#if defined(__ARM_FP16_FORMAT_IEEE)
#define USING_NEON_F16C
#endif
#endif
#endif
#if defined(USING_NEON) && defined(__clang__)
#pragma clang diagnostic ignored "-Wvector-conversion"
#endif
#if defined(__SSE__) && defined(__GNUC__)
#if !defined(__has_warning)
#pragma GCC diagnostic ignored "-Wpsabi"
#elif __has_warning("-Wpsabi")
#pragma GCC diagnostic ignored "-Wpsabi"
#endif
#endif
#if defined(__clang__) || defined(__GNUC__)
#define SI …
#else
#define SI …
#endif
template <typename T, typename P>
SI T load(const P* ptr) { … }
template <typename T, typename P>
SI void store(P* ptr, const T& val) { … }
template <typename D, typename S>
SI D cast(const S& v) { … }
template <typename D, typename S>
SI D bit_pun(const S& v) { … }
SI U32 to_fixed(F f) { … }
#if N == 1
#define if_then_else …
#else
template <typename C, typename T>
SI T if_then_else(C cond, T t, T e) { … }
#endif
SI F F_from_Half(U16 half) { … }
#if defined(__clang__)
__attribute__((no_sanitize("unsigned-integer-overflow")))
#endif
SI U16 Half_from_F(F f) { … }
#if defined(USING_NEON)
SI U16 swap_endian_16(U16 v) {
return (U16)vrev16_u8((uint8x8_t) v);
}
#endif
SI U64 swap_endian_16x4(const U64& rgba) { … }
#if defined(USING_NEON)
SI F min_(F x, F y) { return (F)vminq_f32((float32x4_t)x, (float32x4_t)y); }
SI F max_(F x, F y) { return (F)vmaxq_f32((float32x4_t)x, (float32x4_t)y); }
#elif defined(__loongarch_sx)
SI F min_(F x, F y) { return (F)__lsx_vfmin_s(x, y); }
SI F max_(F x, F y) { return (F)__lsx_vfmax_s(x, y); }
#else
SI F min_(F x, F y) { … }
SI F max_(F x, F y) { … }
#endif
SI F floor_(F x) { … }
SI F approx_log2(F x) { … }
SI F approx_log(F x) { … }
SI F approx_exp2(F x) { … }
SI F approx_pow(F x, float y) { … }
SI F approx_exp(F x) { … }
SI F strip_sign(F x, U32* sign) { … }
SI F apply_sign(F x, U32 sign) { … }
SI F apply_tf(const skcms_TransferFunction* tf, F x) { … }
SI F apply_gamma(const skcms_TransferFunction* tf, F x) { … }
SI F apply_pq(const skcms_TransferFunction* tf, F x) { … }
SI F apply_hlg(const skcms_TransferFunction* tf, F x) { … }
SI F apply_hlginv(const skcms_TransferFunction* tf, F x) { … }
template <typename T, typename P>
SI T load_3(const P* p) { … }
template <typename T, typename P>
SI T load_4(const P* p) { … }
template <typename T, typename P>
SI void store_3(P* p, const T& v) { … }
template <typename T, typename P>
SI void store_4(P* p, const T& v) { … }
SI U8 gather_8(const uint8_t* p, I32 ix) { … }
SI U16 gather_16(const uint8_t* p, I32 ix) { … }
SI U32 gather_32(const uint8_t* p, I32 ix) { … }
SI U32 gather_24(const uint8_t* p, I32 ix) { … }
#if !defined(__arm__)
SI void gather_48(const uint8_t* p, I32 ix, U64* v) { … }
#endif
SI F F_from_U8(U8 v) { … }
SI F F_from_U16_BE(U16 v) { … }
SI U16 U16_from_F(F v) { … }
SI F minus_1_ulp(F v) { … }
SI F table(const skcms_Curve* curve, F v) { … }
SI void sample_clut_8(const uint8_t* grid_8, I32 ix, F* r, F* g, F* b) { … }
SI void sample_clut_8(const uint8_t* grid_8, I32 ix, F* r, F* g, F* b, F* a) { … }
SI void sample_clut_16(const uint8_t* grid_16, I32 ix, F* r, F* g, F* b) { … }
SI void sample_clut_16(const uint8_t* grid_16, I32 ix, F* r, F* g, F* b, F* a) { … }
static void clut(uint32_t input_channels, uint32_t output_channels,
const uint8_t grid_points[4], const uint8_t* grid_8, const uint8_t* grid_16,
F* r, F* g, F* b, F* a) { … }
static void clut(const skcms_A2B* a2b, F* r, F* g, F* b, F a) { … }
static void clut(const skcms_B2A* b2a, F* r, F* g, F* b, F* a) { … }
struct NoCtx { … };
struct Ctx { … };
#define STAGE_PARAMS(MAYBE_REF) …
#if SKCMS_HAS_MUSTTAIL
struct StageList;
StageFn;
struct StageList { … };
#define DECLARE_STAGE(name, arg, CALL_NEXT) …
#define STAGE(name, arg) …
#define FINAL_STAGE(name, arg) …
#else
#define DECLARE_STAGE …
#define STAGE …
#define FINAL_STAGE …
#endif
STAGE(load_a8, NoCtx) { … }
STAGE(load_g8, NoCtx) { … }
STAGE(load_ga88, NoCtx) { … }
STAGE(load_4444, NoCtx) { … }
STAGE(load_565, NoCtx) { … }
STAGE(load_888, NoCtx) { … }
STAGE(load_8888, NoCtx) { … }
STAGE(load_1010102, NoCtx) { … }
STAGE(load_101010x_XR, NoCtx) { … }
STAGE(load_10101010_XR, NoCtx) { … }
STAGE(load_161616LE, NoCtx) { … }
STAGE(load_16161616LE, NoCtx) { … }
STAGE(load_161616BE, NoCtx) { … }
STAGE(load_16161616BE, NoCtx) { … }
STAGE(load_hhh, NoCtx) { … }
STAGE(load_hhhh, NoCtx) { … }
STAGE(load_fff, NoCtx) { … }
STAGE(load_ffff, NoCtx) { … }
STAGE(swap_rb, NoCtx) { … }
STAGE(clamp, NoCtx) { … }
STAGE(invert, NoCtx) { … }
STAGE(force_opaque, NoCtx) { … }
STAGE(premul, NoCtx) { … }
STAGE(unpremul, NoCtx) { … }
STAGE(matrix_3x3, const skcms_Matrix3x3* matrix) { … }
STAGE(matrix_3x4, const skcms_Matrix3x4* matrix) { … }
STAGE(lab_to_xyz, NoCtx) { … }
STAGE(xyz_to_lab, NoCtx) { … }
STAGE(gamma_r, const skcms_TransferFunction* tf) { … }
STAGE(gamma_g, const skcms_TransferFunction* tf) { … }
STAGE(gamma_b, const skcms_TransferFunction* tf) { … }
STAGE(gamma_a, const skcms_TransferFunction* tf) { … }
STAGE(gamma_rgb, const skcms_TransferFunction* tf) { … }
STAGE(tf_r, const skcms_TransferFunction* tf) { … }
STAGE(tf_g, const skcms_TransferFunction* tf) { … }
STAGE(tf_b, const skcms_TransferFunction* tf) { … }
STAGE(tf_a, const skcms_TransferFunction* tf) { … }
STAGE(tf_rgb, const skcms_TransferFunction* tf) { … }
STAGE(pq_r, const skcms_TransferFunction* tf) { … }
STAGE(pq_g, const skcms_TransferFunction* tf) { … }
STAGE(pq_b, const skcms_TransferFunction* tf) { … }
STAGE(pq_a, const skcms_TransferFunction* tf) { … }
STAGE(pq_rgb, const skcms_TransferFunction* tf) { … }
STAGE(hlg_r, const skcms_TransferFunction* tf) { … }
STAGE(hlg_g, const skcms_TransferFunction* tf) { … }
STAGE(hlg_b, const skcms_TransferFunction* tf) { … }
STAGE(hlg_a, const skcms_TransferFunction* tf) { … }
STAGE(hlg_rgb, const skcms_TransferFunction* tf) { … }
STAGE(hlginv_r, const skcms_TransferFunction* tf) { … }
STAGE(hlginv_g, const skcms_TransferFunction* tf) { … }
STAGE(hlginv_b, const skcms_TransferFunction* tf) { … }
STAGE(hlginv_a, const skcms_TransferFunction* tf) { … }
STAGE(hlginv_rgb, const skcms_TransferFunction* tf) { … }
STAGE(table_r, const skcms_Curve* curve) { … }
STAGE(table_g, const skcms_Curve* curve) { … }
STAGE(table_b, const skcms_Curve* curve) { … }
STAGE(table_a, const skcms_Curve* curve) { … }
STAGE(clut_A2B, const skcms_A2B* a2b) { … }
STAGE(clut_B2A, const skcms_B2A* b2a) { … }
FINAL_STAGE(store_a8, NoCtx) { … }
FINAL_STAGE(store_g8, NoCtx) { … }
FINAL_STAGE(store_ga88, NoCtx) { … }
FINAL_STAGE(store_4444, NoCtx) { … }
FINAL_STAGE(store_565, NoCtx) { … }
FINAL_STAGE(store_888, NoCtx) { … }
FINAL_STAGE(store_8888, NoCtx) { … }
FINAL_STAGE(store_101010x_XR, NoCtx) { … }
FINAL_STAGE(store_1010102, NoCtx) { … }
FINAL_STAGE(store_161616LE, NoCtx) { … }
FINAL_STAGE(store_16161616LE, NoCtx) { … }
FINAL_STAGE(store_161616BE, NoCtx) { … }
FINAL_STAGE(store_16161616BE, NoCtx) { … }
FINAL_STAGE(store_hhh, NoCtx) { … }
FINAL_STAGE(store_hhhh, NoCtx) { … }
FINAL_STAGE(store_fff, NoCtx) { … }
FINAL_STAGE(store_ffff, NoCtx) { … }
#if SKCMS_HAS_MUSTTAIL
SI void exec_stages(StageFn* stages, const void** contexts, const char* src, char* dst, int i) { … }
#else
static void exec_stages(const Op* ops, const void** contexts,
const char* src, char* dst, int i) {
F r = F0, g = F0, b = F0, a = F1;
while (true) {
switch (*ops++) {
#define M …
SKCMS_WORK_OPS(M)
#undef M
#define M …
SKCMS_STORE_OPS(M)
#undef M
}
}
}
#endif
void run_program(const Op* program, const void** contexts, SKCMS_MAYBE_UNUSED ptrdiff_t programSize,
const char* src, char* dst, int n,
const size_t src_bpp, const size_t dst_bpp) { … }