chromium/third_party/mediapipe/src/mediapipe/modules/objectron/calculators/frame_annotation_to_rect_calculator.cc

// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and

#include <cmath>
#include <vector>

#include "Eigen/Dense"
#include "absl/memory/memory.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h"
#include "mediapipe/modules/objectron/calculators/frame_annotation_to_rect_calculator.pb.h"

namespace mediapipe {

using Matrix3fRM = Eigen::Matrix<float, 3, 3, Eigen::RowMajor>;
using Eigen::Vector2f;
using Eigen::Vector3f;

namespace {

constexpr char kInputFrameAnnotationTag[] = "FRAME_ANNOTATION";
constexpr char kOutputNormRectsTag[] = "NORM_RECTS";

using ::mediapipe::NormalizedRect;

}  // namespace

// A calculator that converts FrameAnnotation proto to NormalizedRect.
// The rotation angle of the NormalizedRect is derived from object's 3d pose.
// The angle is calculated such that after rotation the 2d projection of y-axis.
// on the image plane is always vertical.
class FrameAnnotationToRectCalculator : public CalculatorBase {
 public:
  enum ViewStatus {
    TOP_VIEW_ON,
    TOP_VIEW_OFF,
  };

  static absl::Status GetContract(CalculatorContract* cc);
  absl::Status Open(CalculatorContext* cc) override;
  absl::Status Process(CalculatorContext* cc) override;

 private:
  void AddAnnotationToRect(const ObjectAnnotation& annotation,
                           std::vector<NormalizedRect>* rect);
  float RotationAngleFromAnnotation(const ObjectAnnotation& annotation);

  float RotationAngleFromPose(const Matrix3fRM& rotation,
                              const Vector3f& translation, const Vector3f& vec);
  ViewStatus status_;
  float off_threshold_;
  float on_threshold_;
};
REGISTER_CALCULATOR(FrameAnnotationToRectCalculator);

absl::Status FrameAnnotationToRectCalculator::GetContract(
    CalculatorContract* cc) {
  RET_CHECK(!cc->Inputs().GetTags().empty());
  RET_CHECK(!cc->Outputs().GetTags().empty());

  if (cc->Inputs().HasTag(kInputFrameAnnotationTag)) {
    cc->Inputs().Tag(kInputFrameAnnotationTag).Set<FrameAnnotation>();
  }

  if (cc->Outputs().HasTag(kOutputNormRectsTag)) {
    cc->Outputs().Tag(kOutputNormRectsTag).Set<std::vector<NormalizedRect>>();
  }
  return absl::OkStatus();
}

absl::Status FrameAnnotationToRectCalculator::Open(CalculatorContext* cc) {
  cc->SetOffset(TimestampDiff(0));
  status_ = TOP_VIEW_OFF;
  const auto& options = cc->Options<FrameAnnotationToRectCalculatorOptions>();
  off_threshold_ = options.off_threshold();
  on_threshold_ = options.on_threshold();
  RET_CHECK(off_threshold_ <= on_threshold_);
  return absl::OkStatus();
}

absl::Status FrameAnnotationToRectCalculator::Process(CalculatorContext* cc) {
  if (cc->Inputs().Tag(kInputFrameAnnotationTag).IsEmpty()) {
    return absl::OkStatus();
  }
  auto output_rects = absl::make_unique<std::vector<NormalizedRect>>();
  const auto& frame_annotation =
      cc->Inputs().Tag(kInputFrameAnnotationTag).Get<FrameAnnotation>();
  for (const auto& object_annotation : frame_annotation.annotations()) {
    AddAnnotationToRect(object_annotation, output_rects.get());
  }

  // Output.
  cc->Outputs()
      .Tag(kOutputNormRectsTag)
      .Add(output_rects.release(), cc->InputTimestamp());
  return absl::OkStatus();
}

void FrameAnnotationToRectCalculator::AddAnnotationToRect(
    const ObjectAnnotation& annotation, std::vector<NormalizedRect>* rects) {
  float x_min = std::numeric_limits<float>::max();
  float x_max = std::numeric_limits<float>::min();
  float y_min = std::numeric_limits<float>::max();
  float y_max = std::numeric_limits<float>::min();
  for (const auto& keypoint : annotation.keypoints()) {
    const auto& point_2d = keypoint.point_2d();
    x_min = std::min(x_min, point_2d.x());
    x_max = std::max(x_max, point_2d.x());
    y_min = std::min(y_min, point_2d.y());
    y_max = std::max(y_max, point_2d.y());
  }
  NormalizedRect new_rect;
  new_rect.set_x_center((x_min + x_max) / 2);
  new_rect.set_y_center((y_min + y_max) / 2);
  new_rect.set_width(x_max - x_min);
  new_rect.set_height(y_max - y_min);
  new_rect.set_rotation(RotationAngleFromAnnotation(annotation));
  rects->push_back(new_rect);
}

float FrameAnnotationToRectCalculator::RotationAngleFromAnnotation(
    const ObjectAnnotation& annotation) {
  // Get box rotation and translation from annotation.
  const auto box_rotation =
      Eigen::Map<const Matrix3fRM>(annotation.rotation().data());
  const auto box_translation =
      Eigen::Map<const Vector3f>(annotation.translation().data());

  // Rotation angle to use when top-view is on(top-view on),
  // Which will make z-axis upright after the rotation.
  const float angle_on =
      RotationAngleFromPose(box_rotation, box_translation, Vector3f::UnitZ());
  // Rotation angle to use when side-view is on(top-view off),
  // Which will make y-axis upright after the rotation.
  const float angle_off =
      RotationAngleFromPose(box_rotation, box_translation, Vector3f::UnitY());

  // Calculate angle between z-axis and viewing ray in degrees.
  const float view_to_z_angle = std::acos(box_rotation(2, 1)) * 180 / M_PI;

  // Determine threshold based on current status,
  // on_threshold_ is used for TOP_VIEW_ON -> TOP_VIEW_OFF transition,
  // off_threshold_ is used for TOP_VIEW_OFF -> TOP_VIEW_ON transition.
  const float thresh =
      (status_ == TOP_VIEW_ON) ? on_threshold_ : off_threshold_;

  // If view_to_z_angle is smaller than threshold, then top-view is on;
  // Otherwise top-view is off.
  status_ = (view_to_z_angle < thresh) ? TOP_VIEW_ON : TOP_VIEW_OFF;

  // Determine which angle to used based on current status_.
  float angle_to_rotate = (status_ == TOP_VIEW_ON) ? angle_on : angle_off;
  return angle_to_rotate;
}

float FrameAnnotationToRectCalculator::RotationAngleFromPose(
    const Matrix3fRM& rotation, const Vector3f& translation,
    const Vector3f& vec) {
  auto p1 = rotation * vec + translation;
  auto p2 = -rotation * vec + translation;
  const float dy = p2[2] * p2[1] - p1[2] * p1[1];
  const float dx = p2[2] * p2[0] - p1[2] * p1[0];
  return M_PI / 2 - std::atan2(dy, dx);
}

}  // namespace mediapipe