reference_ops.h | Explore in Territory

/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REFERENCE_OPS_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REFERENCE_OPS_H_

#include <stdint.h>
#include <sys/types.h>

#include <algorithm>
#include <cmath>
#include <cstring>
#include <functional>
#include <limits>
#include <memory>
#include <type_traits>

#include "Eigen/Core"  // from @eigen_archive
#include "fixedpoint/fixedpoint.h"
#include "ruy/profiler/instrumentation.h"  // from @ruy
#include "tensorflow/lite/core/c/c_api_types.h"
#include "tensorflow/lite/core/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/add.h"
#include "tensorflow/lite/kernels/internal/reference/add_n.h"
#include "tensorflow/lite/kernels/internal/reference/arg_min_max.h"
#include "tensorflow/lite/kernels/internal/reference/batch_matmul.h"
#include "tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
#include "tensorflow/lite/kernels/internal/reference/cast.h"
#include "tensorflow/lite/kernels/internal/reference/ceil.h"
#include "tensorflow/lite/kernels/internal/reference/comparisons.h"
#include "tensorflow/lite/kernels/internal/reference/concatenation.h"
#include "tensorflow/lite/kernels/internal/reference/conv.h"
#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h"
#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
#include "tensorflow/lite/kernels/internal/reference/div.h"
#include "tensorflow/lite/kernels/internal/reference/elu.h"
#include "tensorflow/lite/kernels/internal/reference/exp.h"
#include "tensorflow/lite/kernels/internal/reference/fill.h"
#include "tensorflow/lite/kernels/internal/reference/floor.h"
#include "tensorflow/lite/kernels/internal/reference/floor_div.h"
#include "tensorflow/lite/kernels/internal/reference/floor_mod.h"
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
#include "tensorflow/lite/kernels/internal/reference/gather.h"
#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h"
#include "tensorflow/lite/kernels/internal/reference/log_softmax.h"
#include "tensorflow/lite/kernels/internal/reference/logistic.h"
#include "tensorflow/lite/kernels/internal/reference/lstm_cell.h"
#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h"
#include "tensorflow/lite/kernels/internal/reference/mul.h"
#include "tensorflow/lite/kernels/internal/reference/neg.h"
#include "tensorflow/lite/kernels/internal/reference/pad.h"
#include "tensorflow/lite/kernels/internal/reference/pooling.h"
#include "tensorflow/lite/kernels/internal/reference/prelu.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
#include "tensorflow/lite/kernels/internal/reference/reduce.h"
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
#include "tensorflow/lite/kernels/internal/reference/resize_bilinear.h"
#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h"
#include "tensorflow/lite/kernels/internal/reference/round.h"
#include "tensorflow/lite/kernels/internal/reference/select.h"
#include "tensorflow/lite/kernels/internal/reference/slice.h"
#include "tensorflow/lite/kernels/internal/reference/softmax.h"
#include "tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h"
#include "tensorflow/lite/kernels/internal/reference/space_to_depth.h"
#include "tensorflow/lite/kernels/internal/reference/strided_slice.h"
#include "tensorflow/lite/kernels/internal/reference/string_comparisons.h"
#include "tensorflow/lite/kernels/internal/reference/sub.h"
#include "tensorflow/lite/kernels/internal/reference/tanh.h"
#include "tensorflow/lite/kernels/internal/reference/transpose.h"
#include "tensorflow/lite/kernels/internal/reference/transpose_conv.h"
#include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
#include "tensorflow/lite/kernels/internal/tensor.h"
namespace tflite {

namespace reference_ops {

template <typename T>
inline void Relu(const RuntimeShape& input_shape, const T* input_data,
                 const RuntimeShape& output_shape, T* output_data) { … }

template <typename T>
inline void Relu0To1(const RuntimeShape& input_shape, const T* input_data,
                     const RuntimeShape& output_shape, T* output_data) { … }

template <typename T>
inline void Relu1(const RuntimeShape& input_shape, const T* input_data,
                  const RuntimeShape& output_shape, T* output_data) { … }

inline void Relu6(const RuntimeShape& input_shape, const float* input_data,
                  const RuntimeShape& output_shape, float* output_data) { … }

template <typename T>
inline void ReluX(const tflite::ReluParams& params,
                  const RuntimeShape& input_shape, const T* input_data,
                  const RuntimeShape& output_shape, T* output_data) { … }

template <typename T>
inline void ReluX(const tflite::ActivationParams& params,
                  const RuntimeShape& input_shape, const T* input_data,
                  const RuntimeShape& output_shape, T* output_data) { … }

// TODO(jiawen): We can implement BroadcastMul on buffers of arbitrary
// dimensionality if the runtime code does a single loop over one dimension
// that handles broadcasting as the base case. The code generator would then
// generate max(D1, D2) nested for loops.
inline void BroadcastMulFivefold(const ArithmeticParams& unswitched_params,
                                 const RuntimeShape& unswitched_input1_shape,
                                 const uint8_t* unswitched_input1_data,
                                 const RuntimeShape& unswitched_input2_shape,
                                 const uint8_t* unswitched_input2_data,
                                 const RuntimeShape& output_shape,
                                 uint8_t* output_data) { … }

inline void Mul(const ArithmeticParams& params,
                const RuntimeShape& input1_shape, const int16_t* input1_data,
                const RuntimeShape& input2_shape, const int16_t* input2_data,
                const RuntimeShape& output_shape, int16_t* output_data) { … }

inline void Mul(const ArithmeticParams& params,
                const RuntimeShape& input1_shape, const int16_t* input1_data,
                const RuntimeShape& input2_shape, const int16_t* input2_data,
                const RuntimeShape& output_shape, uint8_t* output_data) { … }

inline void Sub16(const ArithmeticParams& params,
                  const RuntimeShape& input1_shape, const int16_t* input1_data,
                  const RuntimeShape& input2_shape, const int16_t* input2_data,
                  const RuntimeShape& output_shape, int16_t* output_data) { … }

template <typename Scalar>
void Pack(const PackParams& params, const RuntimeShape* const* input_shapes,
          const Scalar* const* input_data, const RuntimeShape& output_shape,
          Scalar* output_data) { … }

template <typename Scalar>
void Unpack(const UnpackParams& params, const RuntimeShape& input_shape,
            const Scalar* input_data, const RuntimeShape& output_shape,
            Scalar* const* output_datas) { … }

template <typename Scalar>
void PackWithScaling(const PackParams& params,
                     const RuntimeShape* const* input_shapes,
                     const uint8_t* const* input_data,
                     const RuntimeShape& output_shape, uint8_t* output_data) { … }

template <typename Scalar>
void DepthConcatenation(const ConcatenationParams& params,
                        const RuntimeShape* const* input_shapes,
                        const Scalar* const* input_data,
                        const RuntimeShape& output_shape, Scalar* output_data) { … }

template <typename Scalar>
void Split(const SplitParams& params, const RuntimeShape& input_shape,
           const Scalar* input_data, const RuntimeShape* const* output_shapes,
           Scalar* const* output_data) { … }

inline int NodeOffset(int b, int h, int w, int height, int width) { … }

inline void LocalResponseNormalization(
    const tflite::LocalResponseNormalizationParams& op_params,
    const RuntimeShape& input_shape, const float* input_data,
    const RuntimeShape& output_shape, float* output_data) { … }

inline void Dequantize(const RuntimeShape& input_shape,
                       const Eigen::half* input_data,
                       const RuntimeShape& output_shape, float* output_data) { … }

inline void FakeQuant(const tflite::FakeQuantParams& op_params,
                      const RuntimeShape& input_shape, const float* input_data,
                      const RuntimeShape& output_shape, float* output_data) { … }

// Common subroutine for both `GatherNd` and `GatherNdString`.
struct GatherNdHelperResult { … };

// Returns common values being used on both `GatherNd` and `GatherNdString`.
inline GatherNdHelperResult GatherNdHelper(const RuntimeShape& params_shape,
                                           const RuntimeShape& indices_shape) { … }

// Implements GatherNd.
// Returns an error if any of the indices_data would cause an out of bounds
// memory read.
template <typename ParamsT, typename IndicesT = int32_t>
inline TfLiteStatus GatherNd(const RuntimeShape& params_shape,
                             const ParamsT* params_data,
                             const RuntimeShape& indices_shape,
                             const IndicesT* indices_data,
                             const RuntimeShape& output_shape,
                             ParamsT* output_data) { … }

#ifndef TF_LITE_STATIC_MEMORY
// Implements GatherNd on strings.
// Returns an error if any of the indices_data would cause an out of bounds
// memory read.
template <typename IndicesT = int32_t>
inline TfLiteStatus GatherNdString(const RuntimeShape& params_shape,
                                   const TfLiteTensor* params_data,
                                   const RuntimeShape& indices_shape,
                                   const IndicesT* indices_data,
                                   const RuntimeShape& output_shape,
                                   TfLiteTensor* output_data) { … }
#endif

template <typename IndicesT, typename UpdatesT>
inline TfLiteStatus ScatterNd(const RuntimeShape& indices_shape,
                              const IndicesT* indices_data,
                              const RuntimeShape& updates_shape,
                              const UpdatesT* updates_data,
                              const RuntimeShape& output_shape,
                              UpdatesT* output_data) { … }

template <typename T>
void Minimum(const RuntimeShape& input1_shape, const T* input1_data,
             const T* input2_data, const RuntimeShape& output_shape,
             T* output_data) { … }

// Convenience version that allows, for example, generated-code calls to be
// the same as other binary ops.
template <typename T>
inline void Minimum(const RuntimeShape& input1_shape, const T* input1_data,
                    const RuntimeShape&, const T* input2_data,
                    const RuntimeShape& output_shape, T* output_data) { … }

template <typename T>
void Maximum(const RuntimeShape& input1_shape, const T* input1_data,
             const T* input2_data, const RuntimeShape& output_shape,
             T* output_data) { … }

// Convenience version that allows, for example, generated-code calls to be
// the same as other binary ops.
template <typename T>
inline void Maximum(const RuntimeShape& input1_shape, const T* input1_data,
                    const RuntimeShape&, const T* input2_data,
                    const RuntimeShape& output_shape, T* output_data) { … }

template <typename T1, typename T2, typename T3>
void ArgMax(const RuntimeShape& input1_shape, const T1* input1_data,
            const T3* input2_data, const RuntimeShape& output_shape,
            T2* output_data) { … }

// Convenience version that allows, for example, generated-code calls to be
// the same as other binary ops.
template <typename T1, typename T2, typename T3>
inline void ArgMax(const RuntimeShape& input1_shape, const T1* input1_data,
                   const RuntimeShape& input2_shape, const T3* input2_data,
                   const RuntimeShape& output_shape, T2* output_data) { … }

template <typename D, typename T>
void SelectTrueCoords(const RuntimeShape& input_condition_shape,
                      const D* input_condition_data, T* output_data) { … }

// For easy implementation, the indices is always a vector of size-4 vectors.
template <typename T, typename TI>
inline void SparseToDense(const std::vector<std::vector<TI>>& indices,
                          const T* values, T default_value,
                          bool value_is_scalar,
                          const RuntimeShape& unextended_output_shape,
                          T* output_data) { … }

template <typename T>
inline void Pow(const RuntimeShape& input1_shape, const T* input1_data,
                const RuntimeShape& input2_shape, const T* input2_data,
                const RuntimeShape& output_shape, T* output_data) { … }

template <typename T>
inline void BroadcastPow4DSlow(const RuntimeShape& unextended_input1_shape,
                               const T* input1_data,
                               const RuntimeShape& unextended_input2_shape,
                               const T* input2_data,
                               const RuntimeShape& unextended_output_shape,
                               T* output_data) { … }

template <typename Scalar>
void Reverse(std::array<int32_t, 8>& axes, int num_axes,
             const RuntimeShape& input_shape, const Scalar* input_data,
             Scalar* output_data) { … }

template <typename Scalar, typename TS>
void ReverseSequence(const TS* seq_lengths, const int seq_dim,
                     const int batch_dim, const RuntimeShape& input_shape,
                     const Scalar* input_data, const RuntimeShape& output_shape,
                     Scalar* output_data) { … }

template <typename T>
inline void SegmentSum(const RuntimeShape& input_shape, const T* input_data,
                       const RuntimeShape& segment_ids_shape,
                       const int32_t* segment_ids_data,
                       const RuntimeShape& output_shape, T* output_data) { … }

template <typename T, template <typename T2> typename Op>
inline void UnsortedSegmentRef(const RuntimeShape& input_shape,
                               const T* input_data,
                               const RuntimeShape& segment_ids_shape,
                               const int32_t* segment_ids_data,
                               const RuntimeShape& output_shape,
                               T* output_data) { … }

}  // namespace reference_ops
}  // namespace tflite

#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REFERENCE_OPS_H_
chromium/third_party/tflite/src/tensorflow/lite/kernels/internal/reference/reference_ops.h