chromium/third_party/mediapipe/src/mediapipe/modules/objectron/calculators/box.cc

// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "mediapipe/modules/objectron/calculators/box.h"

#include "Eigen/Core"
#include "absl/log/absl_check.h"
#include "mediapipe/framework/port/logging.h"

namespace mediapipe {

namespace {
constexpr int kFrontFaceId = 4;
constexpr int kTopFaceId = 2;
constexpr int kNumKeypoints = 8 + 1;
constexpr int kNumberOfAxis = 3;
constexpr int kEdgesPerAxis = 4;

}  // namespace

Box::Box(const std::string& category)
    : Model(kBoundingBox, kNumKeypoints, category),
      bounding_box_(kNumKeypoints) {
  transformation_.setIdentity();

  scale_ << 0.1, 0.1, 0.1;

  // The vertices are ordered according to the left-hand rule, so the normal
  // vector of each face will point inward the box.
  faces_.push_back({5, 6, 8, 7});  // +x on yz plane
  faces_.push_back({1, 3, 4, 2});  // -x on yz plane

  faces_.push_back({3, 7, 8, 4});  // +y on xz plane = top
  faces_.push_back({1, 2, 6, 5});  // -y on xz plane

  faces_.push_back({2, 4, 8, 6});  // +z on xy plane = front
  faces_.push_back({1, 5, 7, 3});  // -z on xy plane

  // Add the edges in the cube, they are sorted according to axis (x-y-z).
  edges_.push_back({1, 5});
  edges_.push_back({2, 6});
  edges_.push_back({3, 7});
  edges_.push_back({4, 8});

  edges_.push_back({1, 3});
  edges_.push_back({5, 7});
  edges_.push_back({2, 4});
  edges_.push_back({6, 8});

  edges_.push_back({1, 2});
  edges_.push_back({3, 4});
  edges_.push_back({5, 6});
  edges_.push_back({7, 8});
  Update();
}

void Box::Update() {
  // Compute the eight vertices of the bounding box from Box's parameters
  auto w = scale_[0] / 2.f;
  auto h = scale_[1] / 2.f;
  auto d = scale_[2] / 2.f;

  // Define the local coordinate system, w.r.t. the center of the boxs
  bounding_box_[0] << 0., 0., 0.;
  bounding_box_[1] << -w, -h, -d;
  bounding_box_[2] << -w, -h, +d;
  bounding_box_[3] << -w, +h, -d;
  bounding_box_[4] << -w, +h, +d;
  bounding_box_[5] << +w, -h, -d;
  bounding_box_[6] << +w, -h, +d;
  bounding_box_[7] << +w, +h, -d;
  bounding_box_[8] << +w, +h, +d;

  // Convert to world coordinate system
  for (int i = 0; i < kNumKeypoints; ++i) {
    bounding_box_[i] =
        transformation_.topLeftCorner<3, 3>() * bounding_box_[i] +
        transformation_.col(3).head<3>();
  }
}

void Box::Adjust(const std::vector<float>& variables) {
  Eigen::Vector3f translation;
  translation << variables[0], variables[1], variables[2];
  SetTranslation(translation);

  const float roll = variables[3];
  const float pitch = variables[4];
  const float yaw = variables[5];
  SetRotation(roll, pitch, yaw);

  Eigen::Vector3f scale;
  scale << variables[6], variables[7], variables[8];

  SetScale(scale);
  Update();
}

float* Box::GetVertex(size_t vertex_id) {
  ABSL_CHECK_LT(vertex_id, kNumKeypoints);
  return bounding_box_[vertex_id].data();
}

const float* Box::GetVertex(size_t vertex_id) const {
  ABSL_CHECK_LT(vertex_id, kNumKeypoints);
  return bounding_box_[vertex_id].data();
}

bool Box::InsideTest(const Eigen::Vector3f& point, int check_axis) const {
  const float* v0 = GetVertex(1);
  const float* v1 = GetVertex(2);
  const float* v2 = GetVertex(3);
  const float* v4 = GetVertex(5);

  switch (check_axis) {
    case 1:
      return (v0[0] <= point[0] && point[0] <= v1[0]);  // X-axis
    case 2:
      return (v0[1] <= point[1] && point[1] <= v2[1]);  // Y-axis
    case 3:
      return (v0[2] <= point[2] && point[2] <= v4[2]);  // Z-axis
    default:
      return false;
  }
}

void Box::Deserialize(const Object& obj) {
  ABSL_CHECK_EQ(obj.keypoints_size(), kNumKeypoints);
  Model::Deserialize(obj);
}

void Box::Serialize(Object* obj) {
  Model::Serialize(obj);
  obj->set_type(Object::BOUNDING_BOX);
  std::vector<Vector3f> local_bounding_box(9);
  // Define the local coordinate system, w.r.t. the center of the boxs
  local_bounding_box[0] << 0., 0., 0.;
  local_bounding_box[1] << -0.5, -0.5, -0.5;
  local_bounding_box[2] << -0.5, -0.5, +0.5;
  local_bounding_box[3] << -0.5, +0.5, -0.5;
  local_bounding_box[4] << -0.5, +0.5, +0.5;
  local_bounding_box[5] << +0.5, -0.5, -0.5;
  local_bounding_box[6] << +0.5, -0.5, +0.5;
  local_bounding_box[7] << +0.5, +0.5, -0.5;
  local_bounding_box[8] << +0.5, +0.5, +0.5;
  for (int i = 0; i < kNumKeypoints; ++i) {
    KeyPoint* keypoint = obj->add_keypoints();
    keypoint->set_x(local_bounding_box[i][0]);
    keypoint->set_y(local_bounding_box[i][1]);
    keypoint->set_z(local_bounding_box[i][2]);
    keypoint->set_confidence_radius(0.);
  }
}

const Face& Box::GetFrontFace() const { return faces_[kFrontFaceId]; }

const Face& Box::GetTopFace() const { return faces_[kTopFaceId]; }

std::pair<Vector3f, Vector3f> Box::GetGroundPlane() const {
  const Vector3f gravity = Vector3f(0., 1., 0.);
  int ground_plane_id = 0;
  float ground_plane_error = 10.0;

  auto get_face_center = [&](const Face& face) {
    Vector3f center = Vector3f::Zero();
    for (const int vertex_id : face) {
      center += Map<const Vector3f>(GetVertex(vertex_id));
    }
    center /= face.size();
    return center;
  };

  auto get_face_normal = [&](const Face& face, const Vector3f& center) {
    Vector3f v1 = Map<const Vector3f>(GetVertex(face[0])) - center;
    Vector3f v2 = Map<const Vector3f>(GetVertex(face[1])) - center;
    Vector3f normal = v1.cross(v2);
    return normal;
  };

  // The ground plane is defined as a plane aligned with gravity.
  // gravity is the (0, 1, 0) vector in the world coordinate system.
  const auto& faces = GetFaces();
  for (int face_id = 0; face_id < faces.size(); face_id += 2) {
    const auto& face = faces[face_id];
    Vector3f center = get_face_center(face);
    Vector3f normal = get_face_normal(face, center);
    Vector3f w = gravity.cross(normal);
    const float w_sq_norm = w.squaredNorm();
    if (w_sq_norm < ground_plane_error) {
      ground_plane_error = w_sq_norm;
      ground_plane_id = face_id;
    }
  }

  Vector3f center = get_face_center(faces[ground_plane_id]);
  Vector3f normal = get_face_normal(faces[ground_plane_id], center);

  // For each face, we also have a parallel face that it's normal is also
  // aligned with gravity vector. We pick the face with lower height (y-value).
  // The parallel to face 0 is 1, face 2 is 3, and face 4 is 5.
  int parallel_face_id = ground_plane_id + 1;
  const auto& parallel_face = faces[parallel_face_id];
  Vector3f parallel_face_center = get_face_center(parallel_face);
  Vector3f parallel_face_normal =
      get_face_normal(parallel_face, parallel_face_center);
  if (parallel_face_center[1] < center[1]) {
    center = parallel_face_center;
    normal = parallel_face_normal;
  }
  return {center, normal};
}

template <typename T>
void Box::Fit(const std::vector<T>& vertices) {
  ABSL_CHECK_EQ(vertices.size(), kNumKeypoints);
  scale_.setZero();
  // The scale would remain invariant under rotation and translation.
  // We can safely estimate the scale from the oriented box.
  for (int axis = 0; axis < kNumberOfAxis; ++axis) {
    for (int edge_id = 0; edge_id < kEdgesPerAxis; ++edge_id) {
      // The edges are stored in quadruples according to each axis
      const std::array<int, 2>& edge = edges_[axis * kEdgesPerAxis + edge_id];
      scale_[axis] += (vertices[edge[0]] - vertices[edge[1]]).norm();
    }
    scale_[axis] /= kEdgesPerAxis;
  }
  // Create a scaled axis-aligned box
  transformation_.setIdentity();
  Update();

  using MatrixN3_RM = Eigen::Matrix<float, kNumKeypoints, 3, Eigen::RowMajor>;
  Eigen::Map<const MatrixN3_RM> v(vertices[0].data());
  Eigen::Map<const MatrixN3_RM> system(bounding_box_[0].data());
  auto system_h = system.rowwise().homogeneous().eval();
  auto system_g = system_h.colPivHouseholderQr();
  auto solution = system_g.solve(v).eval();
  transformation_.topLeftCorner<3, 4>() = solution.transpose();
  Update();
}

template void Box::Fit<Vector3f>(const std::vector<Vector3f>&);
template void Box::Fit<Map<Vector3f>>(const std::vector<Map<Vector3f>>&);
template void Box::Fit<Map<const Vector3f>>(
    const std::vector<Map<const Vector3f>>&);
}  // namespace mediapipe