// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <vector>
#include "absl/memory/memory.h"
#include "absl/synchronization/blocking_counter.h"
#include "mediapipe/calculators/image/feature_detector_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/video_stream_header.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_features2d_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/threadpool.h"
#include "mediapipe/framework/tool/options_util.h"
#include "tensorflow/lite/interpreter.h"
namespace mediapipe {
const char kOptionsTag[] = "OPTIONS";
const int kPatchSize = 32;
const int kNumThreads = 16;
// A calculator to apply local feature detection.
// Input stream:
// IMAGE: Input image frame of type ImageFrame from video stream.
// Output streams:
// FEATURES: The detected keypoints from input image as vector<cv::KeyPoint>.
// PATCHES: Optional output the extracted patches as vector<cv::Mat>
class FeatureDetectorCalculator : public CalculatorBase {
public:
~FeatureDetectorCalculator() override = default;
static absl::Status GetContract(CalculatorContract* cc);
absl::Status Open(CalculatorContext* cc) override;
absl::Status Process(CalculatorContext* cc) override;
private:
FeatureDetectorCalculatorOptions options_;
cv::Ptr<cv::Feature2D> feature_detector_;
std::unique_ptr<mediapipe::ThreadPool> pool_;
// Create image pyramid based on input image.
void ComputeImagePyramid(const cv::Mat& input_image,
std::vector<cv::Mat>* image_pyramid);
// Extract the patch for single feature with image pyramid.
cv::Mat ExtractPatch(const cv::KeyPoint& feature,
const std::vector<cv::Mat>& image_pyramid);
};
REGISTER_CALCULATOR(FeatureDetectorCalculator);
absl::Status FeatureDetectorCalculator::GetContract(CalculatorContract* cc) {
if (cc->Inputs().HasTag("IMAGE")) {
cc->Inputs().Tag("IMAGE").Set<ImageFrame>();
}
if (cc->Outputs().HasTag("FEATURES")) {
cc->Outputs().Tag("FEATURES").Set<std::vector<cv::KeyPoint>>();
}
if (cc->Outputs().HasTag("LANDMARKS")) {
cc->Outputs().Tag("LANDMARKS").Set<NormalizedLandmarkList>();
}
if (cc->Outputs().HasTag("PATCHES")) {
cc->Outputs().Tag("PATCHES").Set<std::vector<TfLiteTensor>>();
}
return absl::OkStatus();
}
absl::Status FeatureDetectorCalculator::Open(CalculatorContext* cc) {
options_ =
tool::RetrieveOptions(cc->Options(), cc->InputSidePackets(), kOptionsTag)
.GetExtension(FeatureDetectorCalculatorOptions::ext);
feature_detector_ = cv::ORB::create(
options_.max_features(), options_.scale_factor(),
options_.pyramid_level(), kPatchSize - 1, 0, 2, cv::ORB::FAST_SCORE);
pool_ = absl::make_unique<mediapipe::ThreadPool>("ThreadPool", kNumThreads);
pool_->StartWorkers();
return absl::OkStatus();
}
absl::Status FeatureDetectorCalculator::Process(CalculatorContext* cc) {
const Timestamp& timestamp = cc->InputTimestamp();
if (timestamp == Timestamp::PreStream()) {
// Indicator packet.
return absl::OkStatus();
}
InputStream* input_frame = &(cc->Inputs().Tag("IMAGE"));
cv::Mat input_view = formats::MatView(&input_frame->Get<ImageFrame>());
cv::Mat grayscale_view;
cv::cvtColor(input_view, grayscale_view, cv::COLOR_RGB2GRAY);
std::vector<cv::KeyPoint> keypoints;
feature_detector_->detect(grayscale_view, keypoints);
if (keypoints.size() > options_.max_features()) {
keypoints.resize(options_.max_features());
}
if (cc->Outputs().HasTag("FEATURES")) {
auto features_ptr = absl::make_unique<std::vector<cv::KeyPoint>>(keypoints);
cc->Outputs().Tag("FEATURES").Add(features_ptr.release(), timestamp);
}
if (cc->Outputs().HasTag("LANDMARKS")) {
auto landmarks_ptr = absl::make_unique<NormalizedLandmarkList>();
for (int j = 0; j < keypoints.size(); ++j) {
auto feature_landmark = landmarks_ptr->add_landmark();
feature_landmark->set_x(keypoints[j].pt.x / grayscale_view.cols);
feature_landmark->set_y(keypoints[j].pt.y / grayscale_view.rows);
}
cc->Outputs().Tag("LANDMARKS").Add(landmarks_ptr.release(), timestamp);
}
if (cc->Outputs().HasTag("PATCHES")) {
std::vector<cv::Mat> image_pyramid;
ComputeImagePyramid(grayscale_view, &image_pyramid);
std::vector<cv::Mat> patch_mat;
patch_mat.resize(keypoints.size());
absl::BlockingCounter counter(keypoints.size());
for (int i = 0; i < keypoints.size(); i++) {
pool_->Schedule(
[this, &image_pyramid, &keypoints, &patch_mat, i, &counter] {
patch_mat[i] = ExtractPatch(keypoints[i], image_pyramid);
counter.DecrementCount();
});
}
counter.Wait();
const int batch_size = options_.max_features();
auto patches = absl::make_unique<std::vector<TfLiteTensor>>();
TfLiteTensor tensor;
tensor.type = kTfLiteFloat32;
tensor.dims = TfLiteIntArrayCreate(4);
tensor.dims->data[0] = batch_size;
tensor.dims->data[1] = kPatchSize;
tensor.dims->data[2] = kPatchSize;
tensor.dims->data[3] = 1;
int num_bytes = batch_size * kPatchSize * kPatchSize * sizeof(float);
tensor.data.data = malloc(num_bytes);
tensor.bytes = num_bytes;
tensor.allocation_type = kTfLiteArenaRw;
float* tensor_buffer = tensor.data.f;
for (int i = 0; i < keypoints.size(); i++) {
for (int j = 0; j < patch_mat[i].rows; ++j) {
for (int k = 0; k < patch_mat[i].cols; ++k) {
*tensor_buffer++ = patch_mat[i].at<uchar>(j, k) / 128.0f - 1.0f;
}
}
}
for (int i = keypoints.size() * kPatchSize * kPatchSize; i < num_bytes / 4;
i++) {
*tensor_buffer++ = 0;
}
patches->emplace_back(tensor);
cc->Outputs().Tag("PATCHES").Add(patches.release(), timestamp);
}
return absl::OkStatus();
}
void FeatureDetectorCalculator::ComputeImagePyramid(
const cv::Mat& input_image, std::vector<cv::Mat>* image_pyramid) {
cv::Mat tmp_image = input_image;
cv::Mat src_image = input_image;
for (int i = 0; i < options_.pyramid_level(); ++i) {
image_pyramid->push_back(src_image);
cv::resize(src_image, tmp_image, cv::Size(), 1.0f / options_.scale_factor(),
1.0f / options_.scale_factor());
src_image = tmp_image;
}
}
cv::Mat FeatureDetectorCalculator::ExtractPatch(
const cv::KeyPoint& feature, const std::vector<cv::Mat>& image_pyramid) {
cv::Mat img = image_pyramid[feature.octave];
float scale_factor = 1 / pow(options_.scale_factor(), feature.octave);
cv::Point2f center =
cv::Point2f(feature.pt.x * scale_factor, feature.pt.y * scale_factor);
cv::Mat rot = cv::getRotationMatrix2D(center, feature.angle, 1.0);
rot.at<double>(0, 2) += kPatchSize / 2 - center.x;
rot.at<double>(1, 2) += kPatchSize / 2 - center.y;
cv::Mat cropped_img;
// perform the affine transformation
cv::warpAffine(img, cropped_img, rot, cv::Size(kPatchSize, kPatchSize),
cv::INTER_LINEAR);
return cropped_img;
}
} // namespace mediapipe