kernel_avx2_fma.cc | Explore in Territory

/* Copyright 2019 Google LLC. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#include <algorithm>
#include <cstdint>
#include <cstring>

#include "ruy/check_macros.h"
#include "ruy/kernel_common.h"
#include "ruy/kernel_x86.h"
#include "ruy/opt_set.h"
#include "ruy/platform.h"
#include "ruy/profiler/instrumentation.h"

#if RUY_PLATFORM_AVX2_FMA && RUY_OPT(ASM)
#include <immintrin.h>  // IWYU pragma: keep
#endif

namespace ruy {

#if !(RUY_PLATFORM_AVX2_FMA && RUY_OPT(ASM))

void Kernel8bitAvx2(const KernelParams8bit<8, 8>&) {
  // CPU-ID-based checks should disable the path that would reach this point.
  RUY_DCHECK(false);
}

void Kernel8bitAvx2SingleCol(const KernelParams8bit<8, 8>&) {
  // CPU-ID-based checks should disable the path that would reach this point.
  RUY_DCHECK(false);
}

void KernelFloatAvx2(const KernelParamsFloat<8, 8>&) {
  // CPU-ID-based checks should disable the path that would reach this point.
  RUY_DCHECK(false);
}

void KernelFloatAvx2SingleCol(const KernelParamsFloat<8, 8>&) {
  // CPU-ID-based checks should disable the path that would reach this point.
  RUY_DCHECK(false);
}

#else  // RUY_PLATFORM_AVX2_FMA && RUY_OPT(ASM)

static constexpr int kAvx8bitBlockSize = …;
static constexpr int kAvx8bitInnerSize = …;

namespace {
namespace intrin_utils {

template <>
inline __m256i mm256_shuffle_epi8<Path::kAvx2Fma>(const __m256i& a,
                                                  const __m256i& b) { … }

// Make an inline function for FMA so we can share the float kernels
// with non-FMA code.
template <>
inline __m256 MulAdd<Path::kAvx2Fma>(const __m256& a, const __m256& b,
                                     const __m256& c) { … }

template <>
inline __m128i mm256_extracti128_si256<Path::kAvx2Fma>(const __m256i& a,
                                                       const int imm) { … }

__m256i mm256_blendv_epi32(const __m256i& a, const __m256i& b,
                           const __m256i& mask) { … }

}  // namespace intrin_utils
}  // namespace

template <Path path>
void Kernel8bitAvx2Impl(const KernelParams8bit<8, 8>& params) { … }  // NOLINT(readability/fn_size)

void Kernel8bitAvx2(const KernelParams8bit<8, 8>& params) { … }

template <Path path>
void Kernel8bitAvx2SingleColImpl(const KernelParams8bit<8, 8>& params) { … }  // NOLINT(readability/fn_size)

void Kernel8bitAvx2SingleCol(const KernelParams8bit<8, 8>& params) { … }

void KernelFloatAvx2(const KernelParamsFloat<8, 8>& params) { … }

void KernelFloatAvx2SingleCol(const KernelParamsFloat<8, 8>& params) { … }

#endif  //  RUY_PLATFORM_AVX2_FMA && RUY_OPT(ASM)

}  // namespace ruy
chromium/third_party/ruy/src/ruy/kernel_avx2_fma.cc