detection_postprocess.cc | Explore in Territory

/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <math.h>
#include <stddef.h>
#include <stdint.h>

#include <algorithm>
#include <initializer_list>
#include <numeric>
#include <vector>

#include "flatbuffers/flexbuffers.h"  // from @flatbuffers
#include "tensorflow/lite/core/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
#include "tensorflow/lite/kernels/internal/tensor.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"

namespace tflite {
namespace ops {
namespace custom {
namespace detection_postprocess {

// Input tensors
constexpr int kInputTensorBoxEncodings = …;
constexpr int kInputTensorClassPredictions = …;
constexpr int kInputTensorAnchors = …;

// Output tensors
// When max_classes_per_detection > 1, detection boxes will be replicated by the
// number of detected classes of that box. Dummy data will be appended if the
// number of classes is smaller than max_classes_per_detection.
constexpr int kOutputTensorDetectionBoxes = …;
constexpr int kOutputTensorDetectionClasses = …;
constexpr int kOutputTensorDetectionScores = …;
constexpr int kOutputTensorNumDetections = …;

constexpr int kNumCoordBox = …;
constexpr int kBatchSize = …;

constexpr int kNumDetectionsPerClass = …;

// Object Detection model produces axis-aligned boxes in two formats:
// BoxCorner represents the upper left corner (xmin, ymin) and
// the lower right corner (xmax, ymax).
// CenterSize represents the center (xcenter, ycenter), height and width.
// BoxCornerEncoding and CenterSizeEncoding are related as follows:
// ycenter = y / y_scale * anchor.h + anchor.y;
// xcenter = x / x_scale * anchor.w + anchor.x;
// half_h = 0.5*exp(h/ h_scale)) * anchor.h;
// half_w = 0.5*exp(w / w_scale)) * anchor.w;
// ymin = ycenter - half_h
// ymax = ycenter + half_h
// xmin = xcenter - half_w
// xmax = xcenter + half_w
struct BoxCornerEncoding { … };

struct CenterSizeEncoding { … };
// We make sure that the memory allocations are contiguous with static assert.
static_assert …;
static_assert …;

struct OpData { … };

void* Init(TfLiteContext* context, const char* buffer, size_t length) { … }

void Free(TfLiteContext* context, void* buffer) { … }

TfLiteStatus SetTensorSizes(TfLiteContext* context, TfLiteTensor* tensor,
                            std::initializer_list<int> values) { … }

TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { … }

class Dequantizer { … };

void DequantizeBoxEncodings(const TfLiteTensor* input_box_encodings, int idx,
                            float quant_zero_point, float quant_scale,
                            int length_box_encoding,
                            CenterSizeEncoding* box_centersize) { … }

template <class T>
T ReInterpretTensor(const TfLiteTensor* tensor) { … }

template <class T>
T ReInterpretTensor(TfLiteTensor* tensor) { … }

TfLiteStatus DecodeCenterSizeBoxes(TfLiteContext* context, TfLiteNode* node,
                                   OpData* op_data) { … }

void DecreasingPartialArgSort(const float* values, int num_values,
                              int num_to_sort, int* indices) { … }

void DecreasingArgSort(const float* values, int num_values, int* indices) { … }

void SelectDetectionsAboveScoreThreshold(const std::vector<float>& values,
                                         const float threshold,
                                         std::vector<float>* keep_values,
                                         std::vector<int>* keep_indices) { … }

bool ValidateBoxes(const TfLiteTensor* decoded_boxes, const int num_boxes) { … }

float ComputeIntersectionOverUnion(const TfLiteTensor* decoded_boxes,
                                   const int i, const int j) { … }

// NonMaxSuppressionSingleClass() prunes out the box locations with high overlap
// before selecting the highest scoring boxes (max_detections in number)
// It assumes all boxes are good in beginning and sorts based on the scores.
// If lower-scoring box has too much overlap with a higher-scoring box,
// we get rid of the lower-scoring box.
// Complexity is O(N^2) pairwise comparison between boxes
TfLiteStatus NonMaxSuppressionSingleClassHelper(
    TfLiteContext* context, TfLiteNode* node, OpData* op_data,
    const std::vector<float>& scores, int max_detections,
    std::vector<int>* selected) { … }

struct BoxInfo { … };

struct NMSTaskParam { … };

void InplaceMergeBoxInfo(std::vector<BoxInfo>& boxes, int mid_index,
                         int end_index) { … }

TfLiteStatus ComputeNMSResult(const NMSTaskParam& nms_task_param, int col_begin,
                              int col_end, int& sorted_indices_size,
                              std::vector<BoxInfo>& resulted_sorted_box_info) { … }

struct NonMaxSuppressionWorkerTask : cpu_backend_threadpool::Task { … };

// This function implements a regular version of Non Maximal Suppression (NMS)
// for multiple classes where
// 1) we do NMS separately for each class across all anchors and
// 2) keep only the highest anchor scores across all classes
// 3) The worst runtime of the regular NMS is O(K*N^2)
// where N is the number of anchors and K the number of
// classes.
TfLiteStatus NonMaxSuppressionMultiClassRegularHelper(TfLiteContext* context,
                                                      TfLiteNode* node,
                                                      OpData* op_data,
                                                      const float* scores) { … }

// This function implements a fast version of Non Maximal Suppression for
// multiple classes where
// 1) we keep the top-k scores for each anchor and
// 2) during NMS, each anchor only uses the highest class score for sorting.
// 3) Compared to standard NMS, the worst runtime of this version is O(N^2)
// instead of O(KN^2) where N is the number of anchors and K the number of
// classes.
TfLiteStatus NonMaxSuppressionMultiClassFastHelper(TfLiteContext* context,
                                                   TfLiteNode* node,
                                                   OpData* op_data,
                                                   const float* scores) { … }

void DequantizeClassPredictions(const TfLiteTensor* input_class_predictions,
                                const int num_boxes,
                                const int num_classes_with_background,
                                TfLiteTensor* scores) { … }

TfLiteStatus NonMaxSuppressionMultiClass(TfLiteContext* context,
                                         TfLiteNode* node, OpData* op_data) { … }

TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { … }
}  // namespace detection_postprocess

TfLiteRegistration* Register_DETECTION_POSTPROCESS() { … }

// Since the op is named "TFLite_Detection_PostProcess", the selective build
// tool will assume the register function is named
// "Register_TFLITE_DETECTION_POST_PROCESS".
TfLiteRegistration* Register_TFLITE_DETECTION_POST_PROCESS() { … }

}  // namespace custom
}  // namespace ops
}  // namespace tflite
chromium/third_party/tflite/src/tensorflow/lite/kernels/detection_postprocess.cc