chromium/third_party/eigen3/src/Eigen/src/Core/AssignEvaluator.h

// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2011 Benoit Jacob <[email protected]>
// Copyright (C) 2011-2014 Gael Guennebaud <[email protected]>
// Copyright (C) 2011-2012 Jitse Niesen <[email protected]>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef EIGEN_ASSIGN_EVALUATOR_H
#define EIGEN_ASSIGN_EVALUATOR_H

// IWYU pragma: private
#include "./InternalHeaderCheck.h"

namespace Eigen {

// This implementation is based on Assign.h

namespace internal {

/***************************************************************************
 * Part 1 : the logic deciding a strategy for traversal and unrolling       *
 ***************************************************************************/

// copy_using_evaluator_traits is based on assign_traits

template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = -1>
struct copy_using_evaluator_traits {};

/***************************************************************************
 * Part 2 : meta-unrollers
 ***************************************************************************/

/************************
*** Default traversal ***
************************/

template <typename Kernel, int Index, int Stop>
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling {};

copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>;

template <typename Kernel, int Index_, int Stop>
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling {};

copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>;

/***********************
*** Linear traversal ***
***********************/

template <typename Kernel, int Index, int Stop>
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling {};

copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>;

/**************************
*** Inner vectorization ***
**************************/

template <typename Kernel, int Index, int Stop>
struct copy_using_evaluator_innervec_CompleteUnrolling {};

copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>;

template <typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
struct copy_using_evaluator_innervec_InnerUnrolling {};

copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>;

/***************************************************************************
 * Part 3 : implementation of all cases
 ***************************************************************************/

// dense_assignment_loop is based on assign_impl

template <typename Kernel, int Traversal = Kernel::AssignmentTraits::Traversal,
          int Unrolling = Kernel::AssignmentTraits::Unrolling>
struct dense_assignment_loop;

/************************
***** Special Cases *****
************************/

// Zero-sized assignment is a no-op.
dense_assignment_loop<Kernel, AllAtOnceTraversal, Unrolling>;

/************************
*** Default traversal ***
************************/

dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>;

dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>;

dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>;

/***************************
*** Linear vectorization ***
***************************/

// The goal of unaligned_dense_assignment_loop is simply to factorize the handling
// of the non vectorizable beginning and ending parts

template <bool IsAligned = false>
struct unaligned_dense_assignment_loop {};

template <>
struct unaligned_dense_assignment_loop<false> {};

template <typename Kernel, int Index, int Stop>
struct copy_using_evaluator_linearvec_CompleteUnrolling {};

copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, Stop, Stop>;

dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>;

dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>;

/**************************
*** Inner vectorization ***
**************************/

dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>;

dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>;

dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>;

/***********************
*** Linear traversal ***
***********************/

dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>;

dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>;

/**************************
*** Slice vectorization ***
***************************/

dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>;

#if EIGEN_UNALIGNED_VECTORIZE
dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>;
#endif

/***************************************************************************
 * Part 4 : Generic dense assignment kernel
 ***************************************************************************/

// This class generalize the assignment of a coefficient (or packet) from one dense evaluator
// to another dense writable evaluator.
// It is parametrized by the two evaluators, and the actual assignment functor.
// This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
// One can customize the assignment using this generic dense_assignment_kernel with different
// functors, or by completely overloading it, by-passing a functor.
template <typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
class generic_dense_assignment_kernel {};

// Special kernel used when computing small products whose operands have dynamic dimensions.  It ensures that the
// PacketSize used is no larger than 4, thereby increasing the chance that vectorized instructions will be used
// when computing the product.

template <typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
class restricted_packet_dense_assignment_kernel
    : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> {};

/***************************************************************************
 * Part 5 : Entry point for dense rectangular assignment
 ***************************************************************************/

template <typename DstXprType, typename SrcXprType, typename Functor>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize_if_allowed(DstXprType& dst, const SrcXprType& src,
                                                             const Functor& /*func*/) {}

template <typename DstXprType, typename SrcXprType, typename T1, typename T2>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize_if_allowed(DstXprType& dst, const SrcXprType& src,
                                                             const internal::assign_op<T1, T2>& /*func*/) {}

template <typename DstXprType, typename SrcXprType, typename Functor>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_dense_assignment_loop(DstXprType& dst,
                                                                                      const SrcXprType& src,
                                                                                      const Functor& func) {}

// Specialization for filling the destination with a constant value.
#ifndef EIGEN_GPU_COMPILE_PHASE
template <typename DstXprType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(
    DstXprType& dst,
    const Eigen::CwiseNullaryOp<Eigen::internal::scalar_constant_op<typename DstXprType::Scalar>, DstXprType>& src,
    const internal::assign_op<typename DstXprType::Scalar, typename DstXprType::Scalar>& func) {}
#endif

template <typename DstXprType, typename SrcXprType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) {}

/***************************************************************************
 * Part 6 : Generic assignment
 ***************************************************************************/

// Based on the respective shapes of the destination and source,
// the class AssignmentKind determine the kind of assignment mechanism.
// AssignmentKind must define a Kind typedef.
template <typename DstShape, typename SrcShape>
struct AssignmentKind;

// Assignment kind defined in this file:
struct Dense2Dense {};
struct EigenBase2EigenBase {};

template <typename, typename>
struct AssignmentKind {};
template <>
struct AssignmentKind<DenseShape, DenseShape> {};

// This is the main assignment class
template <typename DstXprType, typename SrcXprType, typename Functor,
          typename Kind = typename AssignmentKind<typename evaluator_traits<DstXprType>::Shape,
                                                  typename evaluator_traits<SrcXprType>::Shape>::Kind,
          typename EnableIf = void>
struct Assignment;

// The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic
// transposition. Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite
// complicated. So this intermediate function removes everything related to "assume-aliasing" such that Assignment does
// not has to bother about these annoying details.

template <typename Dst, typename Src>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(Dst& dst, const Src& src) {}
template <typename Dst, typename Src>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(const Dst& dst, const Src& src) {}

// Deal with "assume-aliasing"
template <typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_assignment(
    Dst& dst, const Src& src, const Func& func, std::enable_if_t<evaluator_assume_aliasing<Src>::value, void*> = 0) {}

template <typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(
    Dst& dst, const Src& src, const Func& func, std::enable_if_t<!evaluator_assume_aliasing<Src>::value, void*> = 0) {}

// by-pass "assume-aliasing"
// When there is no aliasing, we require that 'dst' has been properly resized
template <typename Dst, template <typename> class StorageBase, typename Src, typename Func>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_assignment(NoAlias<Dst, StorageBase>& dst,
                                                                           const Src& src, const Func& func) {}

template <typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_assignment_no_alias(Dst& dst, const Src& src,
                                                                                    const Func& func) {}

template <typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src,
                                                                                      const Func& func) {}

template <typename Dst, typename Src>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_assignment_no_alias(Dst& dst, const Src& src) {}

template <typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_assignment_no_alias_no_transpose(Dst& dst,
                                                                                                 const Src& src,
                                                                                                 const Func& func) {}
template <typename Dst, typename Src>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_assignment_no_alias_no_transpose(Dst& dst,
                                                                                                 const Src& src) {}

// forward declaration
template <typename Dst, typename Src>
EIGEN_DEVICE_FUNC void check_for_aliasing(const Dst& dst, const Src& src);

// Generic Dense to Dense assignment
// Note that the last template argument "Weak" is needed to make it possible to perform
// both partial specialization+SFINAE without ambiguous specialization
Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>;

// Generic assignment through evalTo.
// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
// Note that the last template argument "Weak" is needed to make it possible to perform
// both partial specialization+SFINAE without ambiguous specialization
Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>;

}  // namespace internal

}  // end namespace Eigen

#endif  // EIGEN_ASSIGN_EVALUATOR_H