chromium/third_party/skia/src/opts/SkRasterPipeline_opts.h

/*
 * Copyright 2018 Google Inc.
 *
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

#ifndef SkRasterPipeline_opts_DEFINED
#define SkRasterPipeline_opts_DEFINED

#include "include/core/SkTypes.h"
#include "include/private/base/SkMalloc.h"
#include "include/private/base/SkSpan_impl.h"
#include "include/private/base/SkTemplates.h"
#include "modules/skcms/skcms.h"
#include "src/base/SkUtils.h"  // unaligned_{load,store}
#include "src/core/SkRasterPipeline.h"
#include "src/core/SkRasterPipelineContextUtils.h"
#include "src/shaders/SkPerlinNoiseShaderType.h"
#include "src/sksl/tracing/SkSLTraceHook.h"

#include <cstdint>
#include <type_traits>

// Every function in this file should be marked static and inline using SI.
#if defined(__clang__) || defined(__GNUC__)
    #define SI
#else
    #define SI
#endif

#if defined(__clang__)
    #define SK_UNROLL
#else
    #define SK_UNROLL
#endif

#if defined(__clang__)
    Vec __attribute__((ext_vector_type(N)));
#elif defined(__GNUC__)
    // Unfortunately, GCC does not allow us to omit the struct. This will not compile:
    //   template <int N, typename T> using Vec = T __attribute__((vector_size(N*sizeof(T))));
    template <int N, typename T> struct VecHelper {
        typedef T __attribute__((vector_size(N * sizeof(T)))) V;
    };
    template <int N, typename T> using Vec = typename VecHelper<N, T>::V;
#endif

template <typename Dst, typename Src>
SI Dst widen_cast(const Src& src) {}

struct Ctx {};

NoCtx;

#if defined(JUMPER_IS_SCALAR) || defined(JUMPER_IS_NEON) || defined(JUMPER_IS_HSW) || \
        defined(JUMPER_IS_SKX) || defined(JUMPER_IS_AVX) || defined(JUMPER_IS_SSE41) || \
        defined(JUMPER_IS_SSE2)
    // Honor the existing setting
#elif !defined(__clang__) && !defined(__GNUC__)
    #define JUMPER_IS_SCALAR
#elif defined(SK_ARM_HAS_NEON)
    #define JUMPER_IS_NEON
#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SKX
    #define JUMPER_IS_SKX
#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
    #define JUMPER_IS_HSW
#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX
    #define JUMPER_IS_AVX
#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
    #define JUMPER_IS_SSE41
#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
    #define JUMPER_IS_SSE2
#elif SK_CPU_LSX_LEVEL >= SK_CPU_LSX_LEVEL_LASX
    #define JUMPER_IS_LASX
#elif SK_CPU_LSX_LEVEL >= SK_CPU_LSX_LEVEL_LSX
    #define JUMPER_IS_LSX
#else
    #define JUMPER_IS_SCALAR
#endif

#if defined(JUMPER_IS_SCALAR)
    #include <math.h>
#elif defined(JUMPER_IS_NEON)
    #include <arm_neon.h>
#elif defined(JUMPER_IS_LASX)
    #include <lasxintrin.h>
    #include <lsxintrin.h>
#elif defined(JUMPER_IS_LSX)
    #include <lsxintrin.h>
#else
    #include <immintrin.h>
#endif

// Notes:
// * rcp_fast and rcp_precise both produce a reciprocal, but rcp_fast is an estimate with at least
//   12 bits of precision while rcp_precise should be accurate for float size. For ARM rcp_precise
//   requires 2 Newton-Raphson refinement steps because its estimate has 8 bit precision, and for
//   Intel this requires one additional step because its estimate has 12 bit precision.
//
// * Don't call rcp_approx or rsqrt_approx directly; only use rcp_fast and rsqrt.

sse3  // namespace SK_OPTS_NS

#undef SI

#endif//SkRasterPipeline_opts_DEFINED