#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_INTEGER_OPS_DEPTHWISE_CONV_HYBRID_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_INTEGER_OPS_DEPTHWISE_CONV_HYBRID_H_
#include <algorithm>
#include <memory>
#include "ruy/profiler/instrumentation.h"
#include "tensorflow/lite/kernels/cpu_backend_context.h"
#include "tensorflow/lite/kernels/cpu_backend_threadpool.h"
#include "tensorflow/lite/kernels/internal/optimized/cpu_check.h"
#include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h"
#include "tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv.h"
#include "tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_hybrid_3x3_filter.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace optimized_integer_ops {
namespace depthwise_conv {
inline void DepthwiseConvInitAccBuffer(int num_output_pixels, int output_depth,
int32* acc_buffer) { … }
static void DoDepthwiseConvHybridGeneral(
const DepthwiseParams& params, const float* input_scales,
const RuntimeShape& input_shape, const int8* input_data,
const RuntimeShape& filter_shape, const int8* filter_data,
const RuntimeShape& bias_shape, const float* bias_data,
const RuntimeShape& output_shape, float* output_data,
const float* per_channel_scales, const int32_t* input_offsets,
int thread_start, int thread_end, int thread_dim, int32* acc_buffer,
int32 acc_buffer_size) { … }
static void DoDepthwiseConvHybridGeneralStatic(
const DepthwiseParams& params, const float* input_scales,
const RuntimeShape& input_shape, const int8* input_data,
const RuntimeShape& filter_shape, const int8* filter_data,
const RuntimeShape& bias_shape, const float* bias_data,
const RuntimeShape& output_shape, float* output_data,
const float* per_channel_scales, const int32_t* input_offsets,
int thread_start, int thread_end, int thread_dim) { … }
inline void DepthwiseConvHybridGeneral(
const DepthwiseParams& params, const float* input_scales,
const RuntimeShape& input_shape, const int8* input_data,
const RuntimeShape& filter_shape, const int8* filter_data,
const RuntimeShape& bias_shape, const float* bias_data,
const RuntimeShape& output_shape, float* output_data,
const float* per_channel_scales, const int32_t* input_offsets,
int thread_start, int thread_end, int thread_dim) { … }
}
template <DepthwiseConvOutputRounding kOutputRounding>
inline void DepthwiseConvHybridWithRounding(
const DepthwiseParams& params, const float* input_scales,
const RuntimeShape& input_shape, const int8* input_data,
const RuntimeShape& filter_shape, const int8* filter_data,
const RuntimeShape& bias_shape, const float* bias_data,
const RuntimeShape& output_shape, float* output_data,
const float* per_channel_scales, const int32_t* input_offsets,
int thread_start, int thread_end, int thread_dim) { … }
inline void DepthwiseConvHybridImpl(
const DepthwiseParams& params, const float* input_scales,
const RuntimeShape& input_shape, const int8* input_data,
const RuntimeShape& filter_shape, const int8* filter_data,
const RuntimeShape& bias_shape, const float* bias_data,
const RuntimeShape& output_shape, float* output_data,
const float* per_channel_scales, const int32_t* input_offsets,
int thread_start, int thread_end, int thread_dim) { … }
template <typename T, typename TS>
struct DepthwiseConvHybridWorkerTask : cpu_backend_threadpool::Task { … };
inline void DepthwiseConvHybridPerChannel(
const DepthwiseParams& params, const float* input_scales,
const RuntimeShape& input_shape, const int8* input_data,
const RuntimeShape& filter_shape, const int8* filter_data,
const RuntimeShape& bias_shape, const float* bias_data,
const RuntimeShape& output_shape, float* output_data,
const float* per_channel_scales, int32_t* input_offsets,
CpuBackendContext* cpu_backend_context) { … }
}
}
#endif