#include "tensorflow/lite/kernels/internal/optimized/integer_ops/fully_connected.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <vector>
#include "tensorflow/lite/core/c/builtin_op_data.h"
#include "tensorflow/lite/core/c/c_api_types.h"
#include "tensorflow/lite/core/c/common.h"
#include "tensorflow/lite/kernels/cpu_backend_context.h"
#include "tensorflow/lite/kernels/internal/optimized/fully_connected_4bit.h"
#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
#include "tensorflow/lite/kernels/internal/optimized/sparse_ops/fully_connected.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
#include "tensorflow/lite/kernels/internal/reference/sparse_ops/fully_connected.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/tensor_utils.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/minimal_logging.h"
#ifdef TFLITE_HAVE_CPUINFO
#include "include/cpuinfo.h"
#endif
#if defined(__APPLE__) || defined(__linux__) || defined(__Fuchsia__)
#include <sys/mman.h>
#include <unistd.h>
#endif
namespace tflite {
namespace ops {
namespace builtin {
namespace fully_connected {
namespace {
bool SupportedSparsityFormat(const TfLiteSparsity& sparsity) { … }
static const int kDimMetadataSizeRandomSparse = …;
static const int kDimMetadataSizeBlockSparse = …;
TfLiteStatus CreateLedgerTensor(const TfLiteSparsity* sparsity,
TfLiteContext* context, TfLiteTensor* ledger) { … }
TfLiteStatus PopulateLedgerData(const TfLiteSparsity* sparsity,
TfLiteContext* context, uint8_t* ledger_data) { … }
TfLiteStatus VerifyPerChannelQuantization(TfLiteContext* context,
const TfLiteTensor* tensor) { … }
TfLiteStatus VerifyQuantizationZeroPoint(const TfLiteTensor* tensor,
int expected_value) { … }
}
enum KernelType { … };
struct OpData { … };
constexpr int kInputTensor = …;
constexpr int kWeightsTensor = …;
constexpr int kBiasTensor = …;
constexpr int kOutputTensor = …;
constexpr int kShuffledInputWorkspaceTensor = …;
constexpr int kQuantizedInputTensor = …;
constexpr int kScalingFactorsTensor = …;
constexpr int kAccumulatorTensor = …;
constexpr int kInputOffsetsTensor = …;
inline TfLiteStatus CheckTypes(TfLiteContext* context,
const TfLiteTensor* input,
const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output,
TfLiteFullyConnectedParams* params) { … }
void* Init(TfLiteContext* context, const char* buffer, size_t length) { … }
void Free(TfLiteContext* context, void* buffer) { … }
TfLiteStatus UpdateOutputSize(TfLiteContext* context,
TfLiteFullyConnectedParams* params,
const TfLiteTensor* input, TfLiteTensor* output,
int batch_size, int num_units, int cols) { … }
TfLiteStatus PrepareImpl4Bit(TfLiteContext* context, TfLiteNode* node,
int lhs_width, int rhs_width, int depth,
int batch_size, int cols, int output_depth) { … }
TfLiteStatus PrepareImpl(TfLiteContext* context, TfLiteNode* node,
KernelType kernel_type) { … }
template <KernelType kernel_type>
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { … }
TfLiteStatus EvalPie(TfLiteContext* context, TfLiteNode* node,
TfLiteFullyConnectedParams* params, OpData* data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output) { … }
TfLiteStatus EvalHybridDense(
TfLiteContext* context, TfLiteNode* node,
TfLiteFullyConnectedParams* params, OpData* data, const TfLiteTensor* input,
const TfLiteTensor* filter, const TfLiteTensor* bias,
TfLiteTensor* input_quantized, TfLiteTensor* scaling_factors,
TfLiteTensor* accum_scratch, TfLiteTensor* row_sums,
TfLiteTensor* input_offsets, TfLiteTensor* output) { … }
void EvalSparseHybridImpl(TfLiteContext* context, TfLiteNode* node,
TfLiteFullyConnectedParams* params, OpData* data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, int thread_start,
int thread_end, TfLiteTensor* input_quantized,
TfLiteTensor* scaling_factors,
TfLiteTensor* accum_scratch, TfLiteTensor* row_sums,
TfLiteTensor* input_offsets, TfLiteTensor* output) { … }
struct SparseHybridFullyConnectedTask : cpu_backend_threadpool::Task { … };
TfLiteStatus EvalHybridDense4Bit(
TfLiteContext* context, TfLiteNode* node,
TfLiteFullyConnectedParams* params, OpData* data, const TfLiteTensor* input,
const TfLiteTensor* filter, const TfLiteTensor* bias,
TfLiteTensor* input_quantized, TfLiteTensor* scaling_factors,
TfLiteTensor* accum_scratch, TfLiteTensor* input_offsets,
TfLiteTensor* output) { … }
TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node,
TfLiteFullyConnectedParams* params, OpData* data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* input_quantized,
TfLiteTensor* scaling_factors,
TfLiteTensor* accum_scratch, TfLiteTensor* row_sums,
TfLiteTensor* input_offsets, TfLiteTensor* output) { … }
namespace {
template <KernelType kernel_type>
void FullyConnectedInt8(const OpData* data, const TfLiteTensor* input,
const TfLiteTensor* filter, const TfLiteTensor* bias,
TfLiteTensor* output,
CpuBackendContext* cpu_backend_context) { … }
template <KernelType kernel_type>
void FullyConnectedInt16(const OpData* data, const TfLiteTensor* input,
const TfLiteTensor* filter, const TfLiteTensor* bias,
TfLiteTensor* output) { … }
template <KernelType kernel_type>
void FullyConnectedPerChannelInt8(const OpData* data, const TfLiteTensor* input,
const TfLiteTensor* filter,
const TfLiteTensor* bias,
TfLiteTensor* output,
CpuBackendContext* cpu_backend_context) { … }
template <KernelType kernel_type>
void FullyConnectedPerChannelInt16(const OpData* data,
const TfLiteTensor* input,
const TfLiteTensor* filter,
const TfLiteTensor* bias,
TfLiteTensor* output) { … }
}
bool VerifySparsity(const RuntimeShape& weights_shape,
const RuntimeShape& input_shape,
const RuntimeShape& output_shape,
const TfLiteSparsity* sparsity) { … }
template <KernelType kernel_type>
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteFullyConnectedParams* params, OpData* data,
const TfLiteTensor* input,
const TfLiteTensor* filter, const TfLiteTensor* bias,
TfLiteTensor* output) { … }
template <KernelType kernel_type>
TfLiteStatus EvalShuffledQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteFullyConnectedParams* params,
OpData* data, const TfLiteTensor* input,
const TfLiteTensor* filter,
const TfLiteTensor* bias,
TfLiteTensor* output,
TfLiteTensor* shuffled_input_workspace) { … }
template <KernelType kernel_type>
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteFullyConnectedParams* params, OpData* data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output) { … }
template <KernelType kernel_type>
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { … }
}
TfLiteRegistration* Register_FULLY_CONNECTED_REF() { … }
TfLiteRegistration* Register_FULLY_CONNECTED_GENERIC_OPT() { … }
TfLiteRegistration* Register_FULLY_CONNECTED_PIE() { … }
TfLiteRegistration* Register_FULLY_CONNECTED() { … }
}
}
}