#ifndef GEMMLOWP_INTERNAL_SIMD_WRAPPERS_H_
#define GEMMLOWP_INTERNAL_SIMD_WRAPPERS_H_
#include <algorithm>
#include <type_traits>
#include "../fixedpoint/fixedpoint.h"
namespace gemmlowp {
template <typename ScalarType, int ScalarCount>
struct RegisterType { … };
inline std::int32_t Min(std::int32_t a, std::int32_t b) { … }
inline std::int32_t Max(std::int32_t a, std::int32_t b) { … }
inline void MulAdd(std::int32_t lhs, std::int32_t rhs, std::int32_t* acc) { … }
template <typename tScalarType, int tScalarCount>
struct RegisterBuffer { … };
template <typename tScalarType, int tRows, int tCols>
struct RegisterBlock { … };
template <typename RegisterBlockType>
struct RegisterBlockAddImpl { … };
template <typename RegisterBlockType>
RegisterBlockType RegisterBlockAdd(const RegisterBlockType& lhs,
const RegisterBlockType& rhs) { … }
template <typename LhsType, typename RhsType>
struct ShouldFlipLhsRhs { … };
template <typename LhsType, typename RhsType,
bool Flip = ShouldFlipLhsRhs<LhsType, RhsType>::kValue>
struct FlipLhsRhs { … };
FlipLhsRhs<LhsType, RhsType, true>;
template <typename Lhs, typename Rhs>
struct BroadcastBinaryOpShape { … };
template <typename Lhs, typename Rhs>
struct BroadcastBinaryOpRegisterBlock { … };
template <typename Lhs, typename Rhs>
struct BroadcastAddImpl { … };
template <typename Lhs, typename Rhs>
typename BroadcastBinaryOpRegisterBlock<Lhs, Rhs>::Type BroadcastAdd(
const Lhs& lhs, const Rhs& rhs) { … }
template <typename Lhs, typename Rhs>
struct BroadcastShiftLeftImpl { … };
template <typename Lhs, typename Rhs>
typename BroadcastBinaryOpRegisterBlock<Lhs, Rhs>::Type BroadcastShiftLeft(
const Lhs& lhs, const Rhs& rhs) { … }
template <typename Lhs, typename Rhs>
struct BroadcastSaturatingRoundingDoublingHighMulImpl { … };
template <typename Lhs, typename Rhs>
typename BroadcastBinaryOpRegisterBlock<Lhs, Rhs>::Type
BroadcastSaturatingRoundingDoublingHighMul(const Lhs& lhs, const Rhs& rhs) { … }
template <typename Lhs, typename Rhs>
struct BroadcastRoundingDivideByPOTImpl { … };
template <typename Lhs, typename Rhs>
typename BroadcastBinaryOpRegisterBlock<Lhs, Rhs>::Type
BroadcastRoundingDivideByPOT(const Lhs& lhs, const Rhs& rhs) { … }
template <typename Lhs, typename Rhs>
struct BroadcastMulImpl { … };
template <typename Lhs, typename Rhs>
typename BroadcastBinaryOpRegisterBlock<Lhs, Rhs>::Type BroadcastMul(
const Lhs& lhs, const Rhs& rhs) { … }
template <typename Lhs, typename Rhs, typename Acc>
struct BroadcastMulAddImpl { … };
template <typename Lhs, typename Rhs, typename Acc>
void BroadcastMulAdd(const Lhs& lhs, const Rhs& rhs, Acc* acc) { … }
template <typename RegisterBlockType, typename SrcObjectType>
struct LoadImpl { … };
LoadImpl<RegisterBlock<ScalarType, Rows, Cols>, MatrixMap<SrcScalarType, MapOrder::ColMajor>>;
LoadImpl<RegisterBlock<ScalarType, Rows, Cols>, VectorMap<SrcScalarType, Shape>>;
LoadImpl<RegisterBlock<ScalarType, Rows, Cols>, VectorDup<SrcScalarType, Shape>>;
template <typename RegisterBlockType, typename SrcObjectType>
RegisterBlockType Load(const SrcObjectType& src, int row, int col) { … }
template <typename RegisterBlockType, typename SrcObjectType>
RegisterBlockType Load(const SrcObjectType& src, int pos) { … }
template <typename RegisterBlockType>
struct LoadContiguousImpl { … };
template <typename RegisterBlockType>
RegisterBlockType LoadContiguous(
const typename RegisterBlockType::ScalarType* src) { … }
template <int BroadcastRows, int BroadcastCols, typename SrcObjectType>
struct LoadForBroadcastingShape { … };
LoadForBroadcastingShape<BroadcastRows, BroadcastCols, VectorMap<ScalarType, Shape>>;
LoadForBroadcastingShape<BroadcastRows, BroadcastCols, VectorDup<ScalarType, Shape>>;
template <typename RegisterBlockType, typename SrcObjectType>
struct LoadForBroadcastingRegisterBlock { … };
template <typename RegisterBlockType, typename SrcObjectType>
struct LoadForBroadcastingImpl { … };
LoadForBroadcastingImpl<RegisterBlock<ScalarType, Rows, Cols>, VectorMap<SrcScalarType, Shape>>;
LoadForBroadcastingImpl<RegisterBlock<ScalarType, Rows, Cols>, VectorDup<SrcScalarType, Shape>>;
template <typename RegisterBlockType, typename SrcObjectType>
typename LoadForBroadcastingRegisterBlock<RegisterBlockType,
SrcObjectType>::Type
LoadForBroadcasting(const SrcObjectType& src, int row, int col) { … }
template <typename RegisterBlockType, typename SrcObjectType>
typename LoadForBroadcastingRegisterBlock<RegisterBlockType,
SrcObjectType>::Type
LoadForBroadcasting(const SrcObjectType& src, int pos) { … }
template <int ConstantValue, typename RegisterBlockType>
struct AddConstantImpl { … };
AddConstantImpl<0, RegisterBlockType>;
template <int ConstantValue, typename RegisterBlockType>
void AddConstant(RegisterBlockType* block) { … }
RegBufferInt32;
RegBufferInt16;
RegBufferUint8;
RegBufferInt8;
RegBlockInt32;
RegBlockInt16;
RegBlockUint8;
RegBlockInt8;
}
#if defined GEMMLOWP_NEON
#include "simd_wrappers_neon.h"
#elif defined GEMMLOWP_SSE4
#include "simd_wrappers_sse.h"
#elif defined GEMMLOWP_MSA
#include "simd_wrappers_msa.h"
#endif
#endif