fully_connected.cc | Explore in Territory

/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#include "tensorflow/lite/kernels/internal/optimized/integer_ops/fully_connected.h"

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <vector>

#include "tensorflow/lite/core/c/builtin_op_data.h"
#include "tensorflow/lite/core/c/c_api_types.h"
#include "tensorflow/lite/core/c/common.h"
#include "tensorflow/lite/kernels/cpu_backend_context.h"
#include "tensorflow/lite/kernels/internal/optimized/fully_connected_4bit.h"
#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
#include "tensorflow/lite/kernels/internal/optimized/sparse_ops/fully_connected.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
#include "tensorflow/lite/kernels/internal/reference/sparse_ops/fully_connected.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/tensor_utils.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/minimal_logging.h"

#ifdef TFLITE_HAVE_CPUINFO
#include "include/cpuinfo.h"
#endif

#if defined(__APPLE__) || defined(__linux__) || defined(__Fuchsia__)
#include <sys/mman.h>
#include <unistd.h>
#endif

namespace tflite {
namespace ops {
namespace builtin {
namespace fully_connected {

namespace {
bool SupportedSparsityFormat(const TfLiteSparsity& sparsity) { … }

static const int kDimMetadataSizeRandomSparse = …;
static const int kDimMetadataSizeBlockSparse = …;

TfLiteStatus CreateLedgerTensor(const TfLiteSparsity* sparsity,
                                TfLiteContext* context, TfLiteTensor* ledger) { … }

TfLiteStatus PopulateLedgerData(const TfLiteSparsity* sparsity,
                                TfLiteContext* context, uint8_t* ledger_data) { … }

TfLiteStatus VerifyPerChannelQuantization(TfLiteContext* context,
                                          const TfLiteTensor* tensor) { … }

TfLiteStatus VerifyQuantizationZeroPoint(const TfLiteTensor* tensor,
                                         int expected_value) { … }

}  // namespace

// This file has four implementations of FullyConnected
enum KernelType { … };

struct OpData { … };

constexpr int kInputTensor = …;
constexpr int kWeightsTensor = …;
constexpr int kBiasTensor = …;
constexpr int kOutputTensor = …;
constexpr int kShuffledInputWorkspaceTensor = …;

// Begin temporary tensor ids created at init and initialized during prepare.
constexpr int kQuantizedInputTensor = …;
constexpr int kScalingFactorsTensor = …;
constexpr int kAccumulatorTensor = …;
constexpr int kInputOffsetsTensor = …;

inline TfLiteStatus CheckTypes(TfLiteContext* context,
                               const TfLiteTensor* input,
                               const TfLiteTensor* filter,
                               const TfLiteTensor* bias, TfLiteTensor* output,
                               TfLiteFullyConnectedParams* params) { … }

void* Init(TfLiteContext* context, const char* buffer, size_t length) { … }

void Free(TfLiteContext* context, void* buffer) { … }

TfLiteStatus UpdateOutputSize(TfLiteContext* context,
                              TfLiteFullyConnectedParams* params,
                              const TfLiteTensor* input, TfLiteTensor* output,
                              int batch_size, int num_units, int cols) { … }

TfLiteStatus PrepareImpl4Bit(TfLiteContext* context, TfLiteNode* node,
                             int lhs_width, int rhs_width, int depth,
                             int batch_size, int cols, int output_depth) { … }

TfLiteStatus PrepareImpl(TfLiteContext* context, TfLiteNode* node,
                         KernelType kernel_type) { … }

template <KernelType kernel_type>
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { … }

TfLiteStatus EvalPie(TfLiteContext* context, TfLiteNode* node,
                     TfLiteFullyConnectedParams* params, OpData* data,
                     const TfLiteTensor* input, const TfLiteTensor* filter,
                     const TfLiteTensor* bias, TfLiteTensor* output) { … }

TfLiteStatus EvalHybridDense(
    TfLiteContext* context, TfLiteNode* node,
    TfLiteFullyConnectedParams* params, OpData* data, const TfLiteTensor* input,
    const TfLiteTensor* filter, const TfLiteTensor* bias,
    TfLiteTensor* input_quantized, TfLiteTensor* scaling_factors,
    TfLiteTensor* accum_scratch, TfLiteTensor* row_sums,
    TfLiteTensor* input_offsets, TfLiteTensor* output) { … }

void EvalSparseHybridImpl(TfLiteContext* context, TfLiteNode* node,
                          TfLiteFullyConnectedParams* params, OpData* data,
                          const TfLiteTensor* input, const TfLiteTensor* filter,
                          const TfLiteTensor* bias, int thread_start,
                          int thread_end, TfLiteTensor* input_quantized,
                          TfLiteTensor* scaling_factors,
                          TfLiteTensor* accum_scratch, TfLiteTensor* row_sums,
                          TfLiteTensor* input_offsets, TfLiteTensor* output) { … }

struct SparseHybridFullyConnectedTask : cpu_backend_threadpool::Task { … };

TfLiteStatus EvalHybridDense4Bit(
    TfLiteContext* context, TfLiteNode* node,
    TfLiteFullyConnectedParams* params, OpData* data, const TfLiteTensor* input,
    const TfLiteTensor* filter, const TfLiteTensor* bias,
    TfLiteTensor* input_quantized, TfLiteTensor* scaling_factors,
    TfLiteTensor* accum_scratch, TfLiteTensor* input_offsets,
    TfLiteTensor* output) { … }

TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node,
                        TfLiteFullyConnectedParams* params, OpData* data,
                        const TfLiteTensor* input, const TfLiteTensor* filter,
                        const TfLiteTensor* bias, TfLiteTensor* input_quantized,
                        TfLiteTensor* scaling_factors,
                        TfLiteTensor* accum_scratch, TfLiteTensor* row_sums,
                        TfLiteTensor* input_offsets, TfLiteTensor* output) { … }

namespace {
template <KernelType kernel_type>
void FullyConnectedInt8(const OpData* data, const TfLiteTensor* input,
                        const TfLiteTensor* filter, const TfLiteTensor* bias,
                        TfLiteTensor* output,
                        CpuBackendContext* cpu_backend_context) { … }

template <KernelType kernel_type>
void FullyConnectedInt16(const OpData* data, const TfLiteTensor* input,
                         const TfLiteTensor* filter, const TfLiteTensor* bias,
                         TfLiteTensor* output) { … }

template <KernelType kernel_type>
void FullyConnectedPerChannelInt8(const OpData* data, const TfLiteTensor* input,
                                  const TfLiteTensor* filter,
                                  const TfLiteTensor* bias,
                                  TfLiteTensor* output,
                                  CpuBackendContext* cpu_backend_context) { … }

template <KernelType kernel_type>
void FullyConnectedPerChannelInt16(const OpData* data,
                                   const TfLiteTensor* input,
                                   const TfLiteTensor* filter,
                                   const TfLiteTensor* bias,
                                   TfLiteTensor* output) { … }

}  // namespace

// Verifies that sparsity values are valid given input/weight/output.
bool VerifySparsity(const RuntimeShape& weights_shape,
                    const RuntimeShape& input_shape,
                    const RuntimeShape& output_shape,
                    const TfLiteSparsity* sparsity) { … }

template <KernelType kernel_type>
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
                           TfLiteFullyConnectedParams* params, OpData* data,
                           const TfLiteTensor* input,
                           const TfLiteTensor* filter, const TfLiteTensor* bias,
                           TfLiteTensor* output) { … }

template <KernelType kernel_type>
TfLiteStatus EvalShuffledQuantized(TfLiteContext* context, TfLiteNode* node,
                                   TfLiteFullyConnectedParams* params,
                                   OpData* data, const TfLiteTensor* input,
                                   const TfLiteTensor* filter,
                                   const TfLiteTensor* bias,
                                   TfLiteTensor* output,
                                   TfLiteTensor* shuffled_input_workspace) { … }

template <KernelType kernel_type>
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
                       TfLiteFullyConnectedParams* params, OpData* data,
                       const TfLiteTensor* input, const TfLiteTensor* filter,
                       const TfLiteTensor* bias, TfLiteTensor* output) { … }

template <KernelType kernel_type>
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { … }

}  // namespace fully_connected

TfLiteRegistration* Register_FULLY_CONNECTED_REF() { … }

TfLiteRegistration* Register_FULLY_CONNECTED_GENERIC_OPT() { … }

// Legacy path for PIE clients.
TfLiteRegistration* Register_FULLY_CONNECTED_PIE() { … }

TfLiteRegistration* Register_FULLY_CONNECTED() { … }

}  // namespace builtin
}  // namespace ops
}  // namespace tflite
chromium/third_party/tflite/src/tensorflow/lite/kernels/fully_connected.cc