
// Copyright 2023 Google LLC
// SPDX-License-Identifier: Apache-2.0
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

// Must be included inside an existing include guard, with the following ops
// already defined: BitCast, And, Set, ShiftLeft, ShiftRight, PromoteLowerTo,
// ConcatEven, ConcatOdd, plus the optional detail::PromoteEvenTo and
// detail::PromoteOddTo (if implemented in the target-specific header).

// This is normally set by set_macros-inl.h before this header is included;
// if not, we are viewing this header standalone. Reduce IDE errors by:
#if !defined(HWY_NAMESPACE)
// 1) Defining HWY_IDE so we get syntax highlighting rather than all-gray text.
#include "hwy/ops/shared-inl.h"
// 2) Entering the HWY_NAMESPACE to make definitions from shared-inl.h visible.
namespace hwy {
namespace HWY_NAMESPACE {
// 3) Providing a dummy VFromD (usually done by the target-specific header).
template <class D>
using VFromD = int;
template <class D>
using TFromV = int;
template <class D>
struct DFromV {};

// ------------------------------ Vec/Create/Get/Set2..4

// On SVE and RVV, Vec2..4 are aliases to built-in types. Also exclude the
// fixed-size SVE targets.

// NOTE: these are used inside arm_neon-inl.h, hence they cannot be defined in
// generic_ops-inl.h, which is included after that.
template <class D>
struct Vec2 {};

template <class D>
struct Vec3 {};

template <class D>
struct Vec4 {};

// D arg is unused but allows deducing D.
template <class D>
HWY_API Vec2<D> Create2(D /* tag */, VFromD<D> v0, VFromD<D> v1) {}

template <class D>
HWY_API Vec3<D> Create3(D /* tag */, VFromD<D> v0, VFromD<D> v1, VFromD<D> v2) {}

template <class D>
HWY_API Vec4<D> Create4(D /* tag */, VFromD<D> v0, VFromD<D> v1, VFromD<D> v2,
                        VFromD<D> v3) {}

template <size_t kIndex, class D>
HWY_API VFromD<D> Get2(Vec2<D> tuple) {}

template <size_t kIndex, class D>
HWY_API VFromD<D> Get3(Vec3<D> tuple) {}

template <size_t kIndex, class D>
HWY_API VFromD<D> Get4(Vec4<D> tuple) {}

template <size_t kIndex, class D>
HWY_API Vec2<D> Set2(Vec2<D> tuple, VFromD<D> val) {}

template <size_t kIndex, class D>
HWY_API Vec3<D> Set3(Vec3<D> tuple, VFromD<D> val) {}

template <size_t kIndex, class D>
HWY_API Vec4<D> Set4(Vec4<D> tuple, VFromD<D> val) {}


// ------------------------------ Rol/Ror (And, Or, Neg, Shl, Shr)
#if (defined(HWY_NATIVE_ROL_ROR_8) == defined(HWY_TARGET_TOGGLE))

template <class V, HWY_IF_UI8(TFromV<V>)>
HWY_API V Rol(V a, V b) {}

template <class V, HWY_IF_UI8(TFromV<V>)>
HWY_API V Ror(V a, V b) {}

#endif  // HWY_NATIVE_ROL_ROR_8

#if (defined(HWY_NATIVE_ROL_ROR_16) == defined(HWY_TARGET_TOGGLE))

template <class V, HWY_IF_UI16(TFromV<V>)>
HWY_API V Rol(V a, V b) {}

template <class V, HWY_IF_UI16(TFromV<V>)>
HWY_API V Ror(V a, V b) {}

#endif  // HWY_NATIVE_ROL_ROR_16

#if (defined(HWY_NATIVE_ROL_ROR_32_64) == defined(HWY_TARGET_TOGGLE))
#ifdef HWY_NATIVE_ROL_ROR_32_64
#undef HWY_NATIVE_ROL_ROR_32_64
#define HWY_NATIVE_ROL_ROR_32_64

template <class V, HWY_IF_UI32(TFromV<V>)>
HWY_API V Rol(V a, V b) {}

template <class V, HWY_IF_UI32(TFromV<V>)>
HWY_API V Ror(V a, V b) {}

template <class V, HWY_IF_UI64(TFromV<V>)>
HWY_API V Rol(V a, V b) {}

template <class V, HWY_IF_UI64(TFromV<V>)>
HWY_API V Ror(V a, V b) {}
#endif  // HWY_HAVE_INTEGER64

#endif  // HWY_NATIVE_ROL_ROR_32_64

// ------------------------------ RotateLeftSame/RotateRightSame

#if (defined(HWY_NATIVE_ROL_ROR_SAME_8) == defined(HWY_TARGET_TOGGLE))

template <class V, HWY_IF_UI8(TFromV<V>)>
HWY_API V RotateLeftSame(V v, int bits) {}

template <class V, HWY_IF_UI8(TFromV<V>)>
HWY_API V RotateRightSame(V v, int bits) {}


#if (defined(HWY_NATIVE_ROL_ROR_SAME_16) == defined(HWY_TARGET_TOGGLE))

template <class V, HWY_IF_UI16(TFromV<V>)>
HWY_API V RotateLeftSame(V v, int bits) {}

template <class V, HWY_IF_UI16(TFromV<V>)>
HWY_API V RotateRightSame(V v, int bits) {}

#if (defined(HWY_NATIVE_ROL_ROR_SAME_32_64) == defined(HWY_TARGET_TOGGLE))

template <class V, HWY_IF_UI32(TFromV<V>)>
HWY_API V RotateLeftSame(V v, int bits) {}

template <class V, HWY_IF_UI32(TFromV<V>)>
HWY_API V RotateRightSame(V v, int bits) {}

template <class V, HWY_IF_UI64(TFromV<V>)>
HWY_API V RotateLeftSame(V v, int bits) {}

template <class V, HWY_IF_UI64(TFromV<V>)>
HWY_API V RotateRightSame(V v, int bits) {}
#endif  // HWY_HAVE_INTEGER64

#endif  // HWY_NATIVE_ROL_ROR_SAME_32_64

// ------------------------------ PromoteEvenTo/PromoteOddTo

// These are used by target-specific headers for ReorderWidenMulAccumulate etc.

namespace detail {

// Tag dispatch is used in detail::PromoteEvenTo and detail::PromoteOddTo as
// there are target-specific specializations for some of the
// detail::PromoteEvenTo and detail::PromoteOddTo cases on

// All targets except HWY_SCALAR use the implementations of
// detail::PromoteEvenTo and detail::PromoteOddTo in generic_ops-inl.h for at
// least some of the PromoteEvenTo and PromoteOddTo cases.

// Signed to signed PromoteEvenTo/PromoteOddTo
template <size_t kToLaneSize, class D, class V>
HWY_INLINE VFromD<D> PromoteEvenTo(
    hwy::SignedTag /*to_type_tag*/,
    hwy::SizeTag<kToLaneSize> /*to_lane_size_tag*/,
    hwy::SignedTag /*from_type_tag*/, D d_to, V v) {}

// Unsigned to unsigned PromoteEvenTo/PromoteOddTo
template <size_t kToLaneSize, class D, class V>
HWY_INLINE VFromD<D> PromoteEvenTo(
    hwy::UnsignedTag /*to_type_tag*/,
    hwy::SizeTag<kToLaneSize> /*to_lane_size_tag*/,
    hwy::UnsignedTag /*from_type_tag*/, D d_to, V v) {}

template <size_t kToLaneSize, class D, class V>
HWY_INLINE VFromD<D> PromoteOddTo(
    hwy::SignedTag /*to_type_tag*/,
    hwy::SizeTag<kToLaneSize> /*to_lane_size_tag*/,
    hwy::SignedTag /*from_type_tag*/, D d_to, V v) {}

template <size_t kToLaneSize, class D, class V>
HWY_INLINE VFromD<D> PromoteOddTo(
    hwy::UnsignedTag /*to_type_tag*/,
    hwy::SizeTag<kToLaneSize> /*to_lane_size_tag*/,
    hwy::UnsignedTag /*from_type_tag*/, D d_to, V v) {}

// Unsigned to signed: Same as unsigned->unsigned PromoteEvenTo/PromoteOddTo
// followed by BitCast to signed
template <size_t kToLaneSize, class D, class V>
HWY_INLINE VFromD<D> PromoteEvenTo(
    hwy::SignedTag /*to_type_tag*/,
    hwy::SizeTag<kToLaneSize> /*to_lane_size_tag*/,
    hwy::UnsignedTag /*from_type_tag*/, D d_to, V v) {}

template <size_t kToLaneSize, class D, class V>
HWY_INLINE VFromD<D> PromoteOddTo(
    hwy::SignedTag /*to_type_tag*/,
    hwy::SizeTag<kToLaneSize> /*to_lane_size_tag*/,
    hwy::UnsignedTag /*from_type_tag*/, D d_to, V v) {}

// BF16->F32 PromoteEvenTo

// NOTE: It is possible for FromTypeTag to be hwy::SignedTag or hwy::UnsignedTag
// instead of hwy::FloatTag on targets that use scalable vectors.

// VBF16 is considered to be a bfloat16_t vector if TFromV<VBF16> is the same
// type as TFromV<VFromD<Repartition<bfloat16_t, DF32>>>

// The BF16->F32 PromoteEvenTo overload is only enabled if VBF16 is considered
// to be a bfloat16_t vector.
template <class FromTypeTag, class DF32, class VBF16,
          class VBF16_2 = VFromD<Repartition<bfloat16_t, DF32>>,
          hwy::EnableIf<IsSame<TFromV<VBF16>, TFromV<VBF16_2>>()>* = nullptr>
HWY_INLINE VFromD<DF32> PromoteEvenTo(hwy::FloatTag /*to_type_tag*/,
                                      hwy::SizeTag<4> /*to_lane_size_tag*/,
                                      FromTypeTag /*from_type_tag*/, DF32 d_to,
                                      VBF16 v) {}

// BF16->F32 PromoteOddTo

// NOTE: It is possible for FromTypeTag to be hwy::SignedTag or hwy::UnsignedTag
// instead of hwy::FloatTag on targets that use scalable vectors.

// VBF16 is considered to be a bfloat16_t vector if TFromV<VBF16> is the same
// type as TFromV<VFromD<Repartition<bfloat16_t, DF32>>>

// The BF16->F32 PromoteEvenTo overload is only enabled if VBF16 is considered
// to be a bfloat16_t vector.
template <class FromTypeTag, class DF32, class VBF16,
          class VBF16_2 = VFromD<Repartition<bfloat16_t, DF32>>,
          hwy::EnableIf<IsSame<TFromV<VBF16>, TFromV<VBF16_2>>()>* = nullptr>
HWY_INLINE VFromD<DF32> PromoteOddTo(hwy::FloatTag /*to_type_tag*/,
                                     hwy::SizeTag<4> /*to_lane_size_tag*/,
                                     FromTypeTag /*from_type_tag*/, DF32 d_to,
                                     VBF16 v) {}

// Default PromoteEvenTo/PromoteOddTo implementations
template <class ToTypeTag, size_t kToLaneSize, class FromTypeTag, class D,
          class V, HWY_IF_LANES_D(D, 1)>
HWY_INLINE VFromD<D> PromoteEvenTo(
    ToTypeTag /*to_type_tag*/, hwy::SizeTag<kToLaneSize> /*to_lane_size_tag*/,
    FromTypeTag /*from_type_tag*/, D d_to, V v) {}

template <class ToTypeTag, size_t kToLaneSize, class FromTypeTag, class D,
          class V, HWY_IF_LANES_GT_D(D, 1)>
HWY_INLINE VFromD<D> PromoteEvenTo(
    ToTypeTag /*to_type_tag*/, hwy::SizeTag<kToLaneSize> /*to_lane_size_tag*/,
    FromTypeTag /*from_type_tag*/, D d_to, V v) {}

template <class ToTypeTag, size_t kToLaneSize, class FromTypeTag, class D,
          class V>
HWY_INLINE VFromD<D> PromoteOddTo(
    ToTypeTag /*to_type_tag*/, hwy::SizeTag<kToLaneSize> /*to_lane_size_tag*/,
    FromTypeTag /*from_type_tag*/, D d_to, V v) {}

}  // namespace detail

template <class D, class V, HWY_IF_T_SIZE_D(D, 2 * sizeof(TFromV<V>)),
          class V2 = VFromD<Repartition<TFromV<V>, D>>,
          HWY_IF_LANES_D(DFromV<V>, HWY_MAX_LANES_V(V2))>
HWY_API VFromD<D> PromoteEvenTo(D d, V v) {}

template <class D, class V, HWY_IF_T_SIZE_D(D, 2 * sizeof(TFromV<V>)),
          class V2 = VFromD<Repartition<TFromV<V>, D>>,
          HWY_IF_LANES_D(DFromV<V>, HWY_MAX_LANES_V(V2))>
HWY_API VFromD<D> PromoteOddTo(D d, V v) {}

// NOLINTNEXTLINE(google-readability-namespace-comments)
}  // namespace HWY_NAMESPACE
}  // namespace hwy