reduce.h | Explore in Territory

/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_REDUCE_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_REDUCE_H_

#include <stdint.h>

#include <algorithm>
#include <limits>
#include <vector>

#include "ruy/profiler/instrumentation.h"  // from @ruy
#include "tensorflow/lite/kernels/cpu_backend_threadpool.h"
#include "tensorflow/lite/kernels/internal/optimized/optimized_ops_utils.h"
#include "tensorflow/lite/kernels/internal/optimized/reduce_utils.h"
#include "tensorflow/lite/kernels/internal/reduce_common.h"
#include "tensorflow/lite/kernels/internal/reference/reduce.h"
#include "tensorflow/lite/kernels/internal/runtime_shape.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"

namespace tflite {
namespace optimized_ops {

inline void MeanImpl(const tflite::MeanParams& op_params,
                     const RuntimeShape& input_shape, const uint8_t* input_data,
                     int32 multiplier, int32 shift, int32 bias,
                     const RuntimeShape& output_shape, uint8_t* output_data,
                     int start_depth, int end_depth) { … }

struct MeanWorkerTask : cpu_backend_threadpool::Task { … };

inline void Mean(const tflite::MeanParams& op_params,
                 const RuntimeShape& unextended_input_shape,
                 const uint8_t* input_data, int32 input_zero_point,
                 float input_scale, const RuntimeShape& unextended_output_shape,
                 uint8_t* output_data, int32 output_zero_point,
                 float output_scale, CpuBackendContext* cpu_backend_context) { … }

template <typename T>
struct SumOp { … };

template <typename T, typename U>
struct CastSumOp { … };

template <typename T>
struct ProdOp { … };

template <typename T>
struct MaxOp { … };

template <typename T>
struct MinOp { … };

struct AndOp { … };

struct OrOp { … };

// When the number of axis is zero, the reduction is simply a copy.
template <typename T>
void ReduceIsCopy(const T* input_data, const int* input_dims,
                  const int input_num_dims, T* output_data) { … }

// Reduces the input over either odd or even dimensions using Op.
// One recursive call for each dimension is made.
// 'depth' is the depth of recursion.
// 'parity' indicates whether odd or even dimensions are being reduced.
// ReducerFirst is applied to the first element to be written to each output
// position.
// ReducerNext is applied to each subsequent element to be written to each
// output position.
template <typename T, typename U, typename ReducerFirst, typename ReducerNext>
inline std::pair<const T*, U*> ReduceImpl(const T* input_data,
                                          const int* input_dims, U* output_data,
                                          int depth, int parity, bool next,
                                          const ReducerFirst& reducer_first,
                                          const ReducerNext& reducer_next) { … }

// A generic reduce method that can be used for reduce_sum, reduce_mean, etc.
// This method iterates through input data and reduce elements along the
// dimensions given in axis. ReducerFirst is used the first time each output
// element is written and ReducerNext is used for all subsequent writes.
template <typename In, typename Out, typename ReducerFirst,
          typename ReducerNext>
inline bool Reduce(const In* input_data, const int* input_dims,
                   const int input_num_dims, const int* axis,
                   const int num_axis, Out* output_data,
                   const ReducerFirst& reducer_first,
                   const ReducerNext& reducer_next) { … }

// Computes the mean or sum of elements across dimensions given in axis.
// It does so in two stages, first calculates the sum of elements along the axis
// then divides it by the number of element in axis for quantized values.
template <typename T, typename U>
bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
                        float input_scale, const int* input_dims,
                        const int input_num_dims, T* output_data,
                        int32_t output_zero_point, float output_scale,
                        const int* output_dims, const int output_num_dims,
                        const int* axis, const int num_axis_dimensions,
                        bool keep_dims, int* normalized_dims,
                        int* resolved_axis, U* temp_sum, bool compute_sum) { … }

ReduceType;

template <typename T>
inline bool ReduceDispatcher(const T* input_data, const int* input_dims,
                             const int input_num_dims, const int* output_dims,
                             int output_num_dims, T* output_data,
                             const int* axis, const int64_t num_axis_dimensions,
                             ReduceType reduce_type) { … }

template <>
inline bool ReduceDispatcher<bool>(const bool* input_data,
                                   const int* input_dims,
                                   const int input_num_dims,
                                   const int* output_dims, int output_num_dims,
                                   bool* output_data, const int* axis,
                                   const int64_t num_axis_dimensions,
                                   ReduceType reduce_type) { … }

// Calculate the reduced product by rescaling each multiplication step to
// avoid an overflow.
template <typename T>
struct ReducerFirst { … };

template <typename T>
struct ReducerNext { … };

template <typename T>
inline bool QuantizedReduceProd(
    const T* input_data, int32_t input_zero_point,
    const RuntimeShape& input_shape, T* output_data, int32_t output_zero_point,
    const RuntimeShape& output_shape, const int* axis,
    const int64_t num_axis_dimensions, int* resolved_axis, int* normalized_dims,
    int32_t* temp_prod, int32_t scaling_multiplier, int scaling_shift) { … }

template <typename T>
inline void Mean(const tflite::MeanParams& op_params,
                 const RuntimeShape& input_shape, const T* input_data,
                 const RuntimeShape& output_shape, T* output_data) { … }

// Computes the mean of elements across dimensions given in axis.
// It does so in two stages, first calculates the sum of elements along the axis
// then divides it by the number of element in axis.
template <typename T, typename U>
inline bool MeanGeneral(const T* input_data, const int* input_dims,
                        const int input_num_dims, T* output_data,
                        const int* output_dims, const int output_num_dims,
                        const int* axis, const int num_axis_dimensions,
                        bool keep_dims, int* normalized_dims,
                        int* resolved_axis, U* temp_sum) { … }

template <typename T, typename U>
inline bool Mean(const T* input_data, const int* input_dims,
                 const int input_num_dims, T* output_data,
                 const int* output_dims, const int output_num_dims,
                 const int* axis, const int num_axis_dimensions, bool keep_dims,
                 int* normalized_dims, int* resolved_axis, U* temp_sum) { … }

// Use Eigen when Mean is calculated over the last dimension only of a float
// tensor.
template <>
inline bool Mean<float, float>(const float* input_data, const int* input_dims,
                               const int input_num_dims, float* output_data,
                               const int* output_dims,
                               const int output_num_dims, const int* axis,
                               const int num_axis_dimensions, bool keep_dims,
                               int* normalized_dims, int* resolved_axis,
                               float* temp_sum) { … }

// Computes the generic value (i.e., sum/max/min/prod) of elements across
// dimensions given in axis. It needs to pass in init_value and reducer.
template <typename T>
inline bool ReduceGeneric(const T* input_data, const int* input_dims,
                          const int input_num_dims, T* output_data,
                          const int* output_dims, const int output_num_dims,
                          const int* axis, const int64_t num_axis_dimensions,
                          int* resolved_axis, int* normalized_dims,
                          ReduceType reduce_type) { … }

}  // namespace optimized_ops
}  // namespace tflite

#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_REDUCE_H_
chromium/third_party/tflite/src/tensorflow/lite/kernels/internal/optimized/reduce.h