#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_SPARSE_OPS_FULLY_CONNECTED_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_SPARSE_OPS_FULLY_CONNECTED_H_
#include <algorithm>
#include <cstdint>
#include "ruy/profiler/instrumentation.h"
#include "tensorflow/lite/core/c/common.h"
#include "tensorflow/lite/kernels/cpu_backend_threadpool.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_utils.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace optimized_ops {
inline void FullyConnectedSparseWeight(
const TfLiteSparsity& sparsity, const FullyConnectedParams& params,
const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& weights_shape, const float* weights_data,
const RuntimeShape& bias_shape, const float* bias_data,
const RuntimeShape& output_shape, float* output_data) { … }
inline void FullyConnectedSparseWeight1x16Impl(
const TfLiteSparsity& sparsity, const FullyConnectedParams& params,
const RuntimeShape& input_shape, const int8_t* input_data,
const RuntimeShape& weights_shape, const int8_t* weights_data,
const int32_t* per_channel_scale, const int32_t* per_channel_shift,
const RuntimeShape& bias_shape, const int32_t* bias_data,
const RuntimeShape& output_shape, int8_t* output_data, int thread_start,
int thread_end, const CpuBackendContext& cpu_backend_context) { … }
inline void FullyConnectedSparseWeight1x4Impl(
const TfLiteSparsity& sparsity, const FullyConnectedParams& params,
const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& weights_shape, const float* weights_data,
const RuntimeShape& bias_shape, const float* bias_data,
const RuntimeShape& output_shape, float* output_data, int thread_start,
int thread_end, const CpuBackendContext& cpu_backend_context) { … }
struct FullyConnectedSparseWeight1x4Task : cpu_backend_threadpool::Task { … };
inline void FullyConnectedSparseWeight1x16(
const TfLiteSparsity& sparsity, const FullyConnectedParams& params,
const RuntimeShape& input_shape, const int8_t* input_data,
const RuntimeShape& weights_shape, const int8_t* weights_data,
const int32_t* per_channel_scale, const int32_t* per_channel_shift,
const RuntimeShape& bias_shape, const int32_t* bias_data,
const RuntimeShape& output_shape, int8_t* output_data,
CpuBackendContext* cpu_backend_context) { … }
inline void FullyConnectedSparseWeight1x4(
const TfLiteSparsity& sparsity, const FullyConnectedParams& params,
const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& weights_shape, const float* weights_data,
const RuntimeShape& bias_shape, const float* bias_data,
const RuntimeShape& output_shape, float* output_data,
CpuBackendContext* cpu_backend_context) { … }
}
}
#endif