// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H #define EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H // IWYU pragma: private #include "./InternalHeaderCheck.h" namespace Eigen { namespace internal { // -------------------------------------------------------------------------- // // Forward declarations for templates defined below. template <typename Scalar, typename IndexType, int NumDims, int Layout> class TensorBlockIO; // -------------------------------------------------------------------------- // // Helper function to compute strides for densely stored buffer of given // dimensions. // TODO(ezhulenev): We compute strides 1000 times in different evaluators, use // this function instead everywhere. template <int Layout, typename IndexType, int NumDims> EIGEN_ALWAYS_INLINE DSizes<IndexType, NumDims> strides(const DSizes<IndexType, NumDims>& dimensions) { … } template <int Layout, typename IndexType, size_t NumDims> EIGEN_ALWAYS_INLINE DSizes<IndexType, NumDims> strides(const Eigen::array<IndexType, NumDims>& dimensions) { … } template <int Layout, std::ptrdiff_t... Indices> EIGEN_STRONG_INLINE DSizes<std::ptrdiff_t, sizeof...(Indices)> strides(const Sizes<Indices...>& sizes) { … } // -------------------------------------------------------------------------- // // Tensor block shape type defines what are the shape preference for the blocks // extracted from the larger tensor. // // Example: blocks of 100 elements from the large 100x100 tensor: // - tensor: 100x100 // - target_block_size: 100 // // TensorBlockShapeType: // - kUniformAllDims: 100 blocks of size 10x10 // - kSkewedInnerDims: 100 blocks of size 100x1 (or 1x100 depending on a column // or row major layout) enum class TensorBlockShapeType { … }; struct TensorBlockResourceRequirements { … }; // -------------------------------------------------------------------------- // // TensorBlockDescriptor specifies a block offset within a tensor and the block // sizes along each of the tensor dimensions. template <int NumDims, typename IndexType = Eigen::Index> class TensorBlockDescriptor { … }; // -------------------------------------------------------------------------- // // TensorBlockMapper is responsible for iterating over the blocks of a tensor. template <int NumDims, int Layout, typename IndexType = Eigen::Index> class TensorBlockMapper { … }; // -------------------------------------------------------------------------- // // TensorBlockScratchAllocator is responsible for allocating temporary buffers // for block evaluation (output or input block materialization). Given that // Eigen expression traversal order is deterministic, all temporary allocations // are happening in the same order, and usually have exactly the same size. // Scratch allocator keeps a trace of all dynamic allocations, and after the // first block evaluation is completed, we should be able to reuse all the // temporary buffers for the next block evaluation. template <typename Device> class TensorBlockScratchAllocator { … }; // -------------------------------------------------------------------------- // // TensorBlockKind represents all possible block kinds, that can be produced by // TensorEvaluator::evalBlock function. enum TensorBlockKind { … }; // -------------------------------------------------------------------------- // // TensorBlockNotImplemented should be used to defined TensorBlock typedef in // TensorEvaluators that do not support block evaluation. class TensorBlockNotImplemented { … }; // -------------------------------------------------------------------------- // // XprScalar extracts Scalar type from the Eigen expressions (if expression type // is not void). It's required to be able to define lazy block expression for // argument types, that do not support block evaluation. template <typename XprType> struct XprScalar { … }; template <> struct XprScalar<void> { … }; // -------------------------------------------------------------------------- // // TensorMaterializedBlock is a fully evaluated block of the original tensor, // and XprType is just a TensorMap over the data. This block type is typically // used to materialize blocks of tensor expressions, that can't be efficiently // represented as lazy Tensor expressions with fast coeff/packet operations, // e.g. we materialize all broadcasts into evaluated blocks. // // TensorMaterializedBlock does not own its memory buffer, it's either a memory // buffer that backs the original expression (e.g. block is just a view into a // Tensor), or a memory buffer allocated with scratch allocator, and in this // case the scratch allocator will deallocate it at the end of block based // expression execution. // // If the block was evaluated directly into the output buffer, and strides in // the output buffer do not match block strides, the TensorMap expression will // be invalid, and should never be used in block assignment or any other tensor // expression. template <typename Scalar, int NumDims, int Layout, typename IndexType = Eigen::Index> class TensorMaterializedBlock { … }; // -------------------------------------------------------------------------- // // TensorCwiseUnaryBlock is a lazy tensor expression block that applies UnaryOp // functor to the blocks produced by the underlying Tensor expression. template <typename UnaryOp, typename ArgTensorBlock> class TensorCwiseUnaryBlock { … }; // -------------------------------------------------------------------------- // // TensorCwiseUnaryBlock is a lazy tensor expression block that applies BinaryOp // functor to the blocks produced by the underlying Tensor expression. template <typename BinaryOp, typename LhsTensorBlock, typename RhsTensorBlock> class TensorCwiseBinaryBlock { … }; // -------------------------------------------------------------------------- // // TensorUnaryExprBlock is a lazy tensor expression block that can construct // an arbitrary tensor expression from a block of the underlying type (this is a // generalization of the TensorCwiseUnaryBlock for arbitrary expressions). template <typename BlockFactory, typename ArgTensorBlock> class TensorUnaryExprBlock { … }; // -------------------------------------------------------------------------- // // TensorTernaryExprBlock is a lazy tensor expression block that can construct // an arbitrary tensor expression from three blocks of the underlying type. template <typename BlockFactory, typename Arg1TensorBlock, typename Arg2TensorBlock, typename Arg3TensorBlock> class TensorTernaryExprBlock { … }; // -------------------------------------------------------------------------- // // StridedLinearBufferCopy provides a method to copy data between two linear // buffers with different strides, with optimized paths for scatter/gather. template <typename Scalar, typename IndexType> class StridedLinearBufferCopy { … }; // -------------------------------------------------------------------------- // // TensorBlockIO copies data from `src` tensor block, to the `dst` tensor block. // It's possible to specify src->dst dimension mapping for the copy operation. // Dimensions of `dst` specify how many elements have to be copied, for the // `src` we need to know only stride to navigate through source memory buffer. template <typename Scalar, typename IndexType, int NumDims, int Layout> class TensorBlockIO { … }; // -------------------------------------------------------------------------- // // TensorBlockAssignment assigns a block expression of type `TensorBlockExpr` to // a Tensor block defined by `desc`, backed by a memory buffer at `target`. // // Currently there is no way to write from a Tensor expression to a block of // memory, if dimensions are reordered. If you need to do that, you should // materialize a Tensor block expression into a memory buffer, and then use // TensorBlockIO to copy data between two memory buffers with a custom // `target->src` dimension map (see definition above). // // Also currently the innermost dimension of `target` must have a stride '1' // (contiguous in memory). This restriction could be lifted with a `pscatter`, // but in practice it's never needed, and there is a similar TensorBlockIO // workaround for that. // // TODO(ezhulenev): TensorBlockAssignment is a special case of TensorBlockIO // where `src` is a tensor expression. Explore if it is possible to rewrite IO // to use expressions instead of pointers, and after that TensorBlockAssignment // will become an alias to IO. template <typename Scalar, int NumDims, typename TensorBlockExpr, typename IndexType = Eigen::Index> class TensorBlockAssignment { … }; // -------------------------------------------------------------------------- // } // namespace internal } // namespace Eigen #endif // EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H