chromium/third_party/tflite/src/third_party/xla/xla/tsl/framework/convolution/eigen_spatial_convolutions-inl.h

/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef XLA_TSL_FRAMEWORK_CONVOLUTION_EIGEN_SPATIAL_CONVOLUTIONS_INL_H_
#define XLA_TSL_FRAMEWORK_CONVOLUTION_EIGEN_SPATIAL_CONVOLUTIONS_INL_H_

#include "xla/tsl/framework/convolution/eigen_convolution_helpers.h"

// Note this header is used in both TF and TFLite.
namespace Eigen {

namespace internal {

#if !EIGEN_ALTIVEC_USE_CUSTOM_PACK
// WARNING: Most of the code here implicitly assumes that the matrix is in
// ColMajor layout. This is guaranteed by the tensor contraction (see
// TensorContraction.h).
//
// Inside Eigen a tensor contraction is represented by a matrix multiplication.
// We don't want to actually extract image patches and reshape the result into
// a matrix (this involves allocating huge extra memory), so the patch
// extraction and reshape operations are implicit.
//
// TensorContractionInputMapper takes a matrix index and returns the coefficient
// (or the packet) of the "virtual tensor", that would be at that index if we
// were to actually reshape the result of patch extraction.
//
// TensorContractionSubMapper provides a similar view into the "virtual matrix"
// at the given vertical and horizontal offsets.
//
// "Virtual matrix" dimensions:
//   *0: kernelChannels * kernelRows * kernelCols;
//    1: out_height * out_width; * OTHERS (e.g batches, etc...)
//
// *) extracted patches are continuous in memory (innermost dimension assuming
//    col major layout)
//
// With this dimensions:
//   row - offset within a single patch (in code: patchId)
//   col - index of the extracted patch (in code: patchIndex)
//         patchIndex ∈ [0..num_patches * OTHERS] (batch and other dimensions)
//
// TODO(ezhulenev): Consolidate this part of the code with the image patch
// extraction code since they are both very similar.

TensorContractionInputMapper<Scalar_, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>;

TensorContractionSubMapper<Scalar, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>;

// Arrange a block of the right input matrix (in our case it's always a "virtual
// matrix" constructed from extracted image patches) in contiguous memory.
//
// Given column major input (A0 beside A1 in memory):
// A0 B0 C0 D0  E0 F0 G0 H0 ... Z0
// A1 B1 C1 D1  E1 F1 G1 H1 ... Z1
// A2 B2 C2 D2  E2 F2 G2 H2 ... Z2
// A3 B3 C3 D3  E3 F3 G3 H3 ... Z3
// A4 B4 C4 D4  E4 F4 G4 H4 ... Z4
// A5 B5 C5 D5  E5 F5 G5 H5 ... Z5
// A6 B6 C6 D6  E6 F6 G6 H6 ... Z6
// A7 B7 C7 D7  E7 F7 G7 H7 ... Z7
// A8 ...
// ...
//
// *) A, B, C, ... - patches extracted from the original input.
// *) A0, A1, A2 ... - values from the same patch at different offsets.
//
// The traversal (packed rhs memory) order (B0 besides A0 in memory):
// A0 B0 C0 D0 A1 B1 C1 D1 ...
// E0 F0 G0 H0 E1 F1 G1 H1 ...
// ...
// Z0 Z1 Z2 Z3 Z4 Z5 Z6 Z7 ... <- doesn't belong to any block (nr = 4)
//
// This traversal order must be the same as in default gemm_pack_rhs defined in
// GeneralBlockPanelKernel.h.
//
// *) nr - number of registers along the 'n' dimension.
//    See GeneralBlockPanelKernel.h and "Anatomy of High-Performance Matrix
//    Multiplication" paper.
gemm_pack_rhs<Scalar, Index, TensorContractionSubMapper<Scalar, Index, Rhs, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>, nr, ColMajor, false, false>;

// Template specialization for packet_size = 2. We must special-case packet
// blocks with nr > packet_size, e.g. PacketBlock<Packet2d, 4>.
gemm_pack_rhs<Scalar, Index, TensorContractionSubMapper<Scalar, Index, Rhs, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>, nocontract_t, contract_t, 2, inner_dim_contiguous, inner_dim_reordered, Alignment>, nr, ColMajor, false, false>;

// Special case for non-vectorized types such as float16.
gemm_pack_rhs<Scalar, Index, TensorContractionSubMapper<Scalar, Index, Rhs, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType>>, Device>, nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment>, nr, ColMajor, false, false>;
#endif
}  // end namespace internal

/** SpatialConvolution
 * \ingroup CXX11_NeuralNetworks_Module
 *
 * \brief Applies a 2D convolution over a multichannel input image.
 *
 * The input parameter is expected to be a tensor with a rank of 3 or more
 * (channels, height, width, and optionally others)
 * The kernel parameter is expected to be a 4D tensor (filters, channels,
 * kernel_height, kernel_width)
 * The input and the kernel must both be in col-major layout. The result will
 * also be in col-major layout.
 *
 * If col_in_stride, row_in_stride > 1, then applies convolution with holes
 * (aka atrous convolution), sampling every col_in_stride, row_in_stride input
 * pixels.
 *
 * If padding_top, padding_bottom, padding_left, or padding_right is specified,
 * then those paddings will be used to pad the input, and padding_type must be
 * PADDING_VALID.
 *
 * The result can be assigned to a tensor of rank equal to the rank of the
 * input. The dimensions of the result will be filters, height, width (and
 * others if applicable).
 *
 * It is possible to swap the order of the width and height dimensions provided
 * that the same order is used in the input, the kernel, and the output.
 *
 * It is also possible to add an output kernel to the contraction, output
 * kernel is called by Eigen when it "finalizes" the block of an output tensor.
 *
 */
template <typename Input, typename Kernel,
          typename OutputKernel = const NoOpOutputKernel>
EIGEN_ALWAYS_INLINE static const std::conditional_t<
    internal::traits<Input>::Layout == ColMajor,
    TensorReshapingOp<
        const DSizes<typename internal::traits<Input>::Index,
                     internal::traits<Input>::NumDimensions>,
        const TensorContractionOp<
            const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
            const TensorReshapingOp<
                const DSizes<typename internal::traits<Input>::Index, 2>,
                const Kernel>,
            const TensorReshapingOp<
                const DSizes<typename internal::traits<Input>::Index, 2>,
                const TensorImagePatchOp<Dynamic, Dynamic, const Input> >,
            const OutputKernel> >,
    TensorReshapingOp<
        const DSizes<typename internal::traits<Input>::Index,
                     internal::traits<Input>::NumDimensions>,
        const TensorContractionOp<
            const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
            const TensorReshapingOp<
                const DSizes<typename internal::traits<Input>::Index, 2>,
                const TensorImagePatchOp<Dynamic, Dynamic, const Input> >,
            const TensorReshapingOp<
                const DSizes<typename internal::traits<Input>::Index, 2>,
                const Kernel>,
            const OutputKernel> > >
SpatialConvolution(const Input& input, const Kernel& kernel,
                   const Index row_stride = 1, const Index col_stride = 1,
                   const PaddingType padding_type = PADDING_SAME,
                   const Index row_in_stride = 1, const Index col_in_stride = 1,
                   const OutputKernel& output_kernel = OutputKernel(),
                   Index padding_top = 0, Index padding_bottom = 0,
                   Index padding_left = 0, Index padding_right = 0) {}

}  // end namespace Eigen

#endif  // XLA_TSL_FRAMEWORK_CONVOLUTION_EIGEN_SPATIAL_CONVOLUTIONS_INL_H_