chromium/third_party/mediapipe/src/mediapipe/calculators/image/image_cropping_calculator.cc

// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "mediapipe/calculators/image/image_cropping_calculator.h"

#include <cmath>

#include "absl/log/absl_log.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"

#if !MEDIAPIPE_DISABLE_GPU
#include "mediapipe/gpu/gl_simple_shaders.h"
#include "mediapipe/gpu/gpu_buffer.h"
#include "mediapipe/gpu/shader_util.h"
#endif  // !MEDIAPIPE_DISABLE_GPU

namespace {
enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES };
}  // namespace

namespace mediapipe {

namespace {
using ::mediapipe::NormalizedRect;
using ::mediapipe::Rect;
#if !MEDIAPIPE_DISABLE_GPU

#endif  // !MEDIAPIPE_DISABLE_GPU

constexpr char kRectTag[] = "RECT";
constexpr char kNormRectTag[] = "NORM_RECT";
constexpr char kHeightTag[] = "HEIGHT";
constexpr char kImageTag[] = "IMAGE";
constexpr char kImageGpuTag[] = "IMAGE_GPU";
constexpr char kWidthTag[] = "WIDTH";

}  // namespace

REGISTER_CALCULATOR(ImageCroppingCalculator);

absl::Status ImageCroppingCalculator::GetContract(CalculatorContract* cc) {
  RET_CHECK(cc->Inputs().HasTag(kImageTag) ^ cc->Inputs().HasTag(kImageGpuTag));
  RET_CHECK(cc->Outputs().HasTag(kImageTag) ^
            cc->Outputs().HasTag(kImageGpuTag));

  bool use_gpu = false;

  if (cc->Inputs().HasTag(kImageTag)) {
    RET_CHECK(cc->Outputs().HasTag(kImageTag));
    cc->Inputs().Tag(kImageTag).Set<ImageFrame>();
    cc->Outputs().Tag(kImageTag).Set<ImageFrame>();
  }
#if !MEDIAPIPE_DISABLE_GPU
  if (cc->Inputs().HasTag(kImageGpuTag)) {
    RET_CHECK(cc->Outputs().HasTag(kImageGpuTag));
    cc->Inputs().Tag(kImageGpuTag).Set<GpuBuffer>();
    cc->Outputs().Tag(kImageGpuTag).Set<GpuBuffer>();
    use_gpu |= true;
  }
#endif  // !MEDIAPIPE_DISABLE_GPU

  int flags = 0;
  if (cc->Inputs().HasTag(kRectTag)) {
    ++flags;
  }
  if (cc->Inputs().HasTag(kWidthTag) && cc->Inputs().HasTag(kHeightTag)) {
    ++flags;
  }
  if (cc->Inputs().HasTag(kNormRectTag)) {
    ++flags;
  }
  if (cc->Options<mediapipe::ImageCroppingCalculatorOptions>()
          .has_norm_width() &&
      cc->Options<mediapipe::ImageCroppingCalculatorOptions>()
          .has_norm_height()) {
    ++flags;
  }
  if (cc->Options<mediapipe::ImageCroppingCalculatorOptions>().has_width() &&
      cc->Options<mediapipe::ImageCroppingCalculatorOptions>().has_height()) {
    ++flags;
  }
  RET_CHECK(flags == 1) << "Illegal combination of input streams/options.";

  if (cc->Inputs().HasTag(kRectTag)) {
    cc->Inputs().Tag(kRectTag).Set<Rect>();
  }
  if (cc->Inputs().HasTag(kNormRectTag)) {
    cc->Inputs().Tag(kNormRectTag).Set<NormalizedRect>();
  }
  if (cc->Inputs().HasTag(kWidthTag)) {
    cc->Inputs().Tag(kWidthTag).Set<int>();
  }
  if (cc->Inputs().HasTag(kHeightTag)) {
    cc->Inputs().Tag(kHeightTag).Set<int>();
  }

  if (use_gpu) {
#if !MEDIAPIPE_DISABLE_GPU
    MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
#endif  // !MEDIAPIPE_DISABLE_GPU
  }

  return absl::OkStatus();
}

absl::Status ImageCroppingCalculator::Open(CalculatorContext* cc) {
  cc->SetOffset(TimestampDiff(0));

  if (cc->Inputs().HasTag(kImageGpuTag)) {
    use_gpu_ = true;
  }

  options_ = cc->Options<mediapipe::ImageCroppingCalculatorOptions>();
  output_max_width_ =
      options_.has_output_max_width() ? options_.output_max_width() : FLT_MAX;
  output_max_height_ =
      options_.has_output_max_height() ? options_.output_max_height() : FLT_MAX;

  if (use_gpu_) {
#if !MEDIAPIPE_DISABLE_GPU
    MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
#else
    RET_CHECK_FAIL() << "GPU processing is for Android and iOS only.";
#endif  // !MEDIAPIPE_DISABLE_GPU
  }

  // Validate border mode.
  if (use_gpu_) {
    MP_RETURN_IF_ERROR(ValidateBorderModeForGPU(cc));
  } else {
    MP_RETURN_IF_ERROR(ValidateBorderModeForCPU(cc));
  }

  return absl::OkStatus();
}

absl::Status ImageCroppingCalculator::Process(CalculatorContext* cc) {
  if (cc->Inputs().HasTag(kRectTag) && cc->Inputs().Tag(kRectTag).IsEmpty()) {
    VLOG(1) << "RECT is empty for timestamp: " << cc->InputTimestamp();
    return absl::OkStatus();
  }
  if (cc->Inputs().HasTag(kNormRectTag) &&
      cc->Inputs().Tag(kNormRectTag).IsEmpty()) {
    VLOG(1) << "NORM_RECT is empty for timestamp: " << cc->InputTimestamp();
    return absl::OkStatus();
  }
  if (use_gpu_) {
#if !MEDIAPIPE_DISABLE_GPU
    MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, cc]() -> absl::Status {
      if (!gpu_initialized_) {
        MP_RETURN_IF_ERROR(InitGpu(cc));
        gpu_initialized_ = true;
      }
      MP_RETURN_IF_ERROR(RenderGpu(cc));
      return absl::OkStatus();
    }));
#endif  // !MEDIAPIPE_DISABLE_GPU
  } else {
    MP_RETURN_IF_ERROR(RenderCpu(cc));
  }
  return absl::OkStatus();
}

absl::Status ImageCroppingCalculator::Close(CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
  gpu_helper_.RunInGlContext([this] {
    if (program_) glDeleteProgram(program_);
    program_ = 0;
  });
  gpu_initialized_ = false;
#endif  // !MEDIAPIPE_DISABLE_GPU

  return absl::OkStatus();
}

absl::Status ImageCroppingCalculator::ValidateBorderModeForCPU(
    CalculatorContext* cc) {
  int border_mode;
  return GetBorderModeForOpenCV(cc, &border_mode);
}

absl::Status ImageCroppingCalculator::ValidateBorderModeForGPU(
    CalculatorContext* cc) {
  mediapipe::ImageCroppingCalculatorOptions options =
      cc->Options<mediapipe::ImageCroppingCalculatorOptions>();

  switch (options.border_mode()) {
    case mediapipe::ImageCroppingCalculatorOptions::BORDER_ZERO:
      ABSL_LOG(WARNING)
          << "BORDER_ZERO mode is not supported by GPU "
          << "implementation and will fall back into BORDER_REPLICATE";
      break;
    case mediapipe::ImageCroppingCalculatorOptions::BORDER_REPLICATE:
      break;
    default:
      RET_CHECK_FAIL() << "Unsupported border mode for GPU: "
                       << options.border_mode();
  }

  return absl::OkStatus();
}

absl::Status ImageCroppingCalculator::RenderCpu(CalculatorContext* cc) {
  if (cc->Inputs().Tag(kImageTag).IsEmpty()) {
    return absl::OkStatus();
  }
  const auto& input_img = cc->Inputs().Tag(kImageTag).Get<ImageFrame>();
  cv::Mat input_mat = formats::MatView(&input_img);

  RectSpec specs = GetCropSpecs(cc, input_img.Width(), input_img.Height());
  int target_width = specs.width, target_height = specs.height;
  float rect_center_x = specs.center_x, rect_center_y = specs.center_y;
  float rotation = specs.rotation;

  // Get border mode and value for OpenCV.
  int border_mode;
  MP_RETURN_IF_ERROR(GetBorderModeForOpenCV(cc, &border_mode));

  const cv::RotatedRect min_rect(cv::Point2f(rect_center_x, rect_center_y),
                                 cv::Size2f(target_width, target_height),
                                 rotation * 180.f / M_PI);
  cv::Mat src_points;
  cv::boxPoints(min_rect, src_points);

  float output_width = min_rect.size.width;
  float output_height = min_rect.size.height;
  float scale = std::min({1.0f, output_max_width_ / output_width,
                          output_max_height_ / output_height});
  output_width *= scale;
  output_height *= scale;

  float dst_corners[8] = {
      0, output_height, 0, 0, output_width, 0, output_width, output_height};
  const cv::Mat dst_points = cv::Mat(4, 2, CV_32F, dst_corners);
  // The projection matrix is computed using the corners of rects, not the
  // centers of corner pixels
  const cv::Mat projection_matrix =
      cv::getPerspectiveTransform(src_points, dst_points);
  // The projection matrix need to be adjusted because `cv::warpPerspective` is
  // based on integer centers.
  // clang-format off
  double shift_src_vec[9] = {1.0, 0.0, 0.5,
                             0.0, 1.0, 0.5,
                             0.0, 0.0, 1.0};
  double shift_dst_vec[9] = {1.0, 0.0, -0.5,
                             0.0, 1.0, -0.5,
                             0.0, 0.0,  1.0};
  // clang-format on
  const cv::Mat shift_src = cv::Mat(3, 3, CV_64F, shift_src_vec);
  const cv::Mat shift_dst = cv::Mat(3, 3, CV_64F, shift_dst_vec);
  const cv::Mat adjusted_projection_matrix =
      shift_dst * projection_matrix * shift_src;
  cv::Mat cropped_image;
  cv::warpPerspective(input_mat, cropped_image, adjusted_projection_matrix,
                      cv::Size(output_width, output_height),
                      /* flags = */ 0,
                      /* borderMode = */ border_mode);

  std::unique_ptr<ImageFrame> output_frame(new ImageFrame(
      input_img.Format(), cropped_image.cols, cropped_image.rows));
  cv::Mat output_mat = formats::MatView(output_frame.get());
  cropped_image.copyTo(output_mat);
  cc->Outputs().Tag(kImageTag).Add(output_frame.release(),
                                   cc->InputTimestamp());
  return absl::OkStatus();
}

absl::Status ImageCroppingCalculator::RenderGpu(CalculatorContext* cc) {
  if (cc->Inputs().Tag(kImageGpuTag).IsEmpty()) {
    return absl::OkStatus();
  }
#if !MEDIAPIPE_DISABLE_GPU
  const Packet& input_packet = cc->Inputs().Tag(kImageGpuTag).Value();
  const auto& input_buffer = input_packet.Get<mediapipe::GpuBuffer>();
  auto src_tex = gpu_helper_.CreateSourceTexture(input_buffer);

  int out_width, out_height;
  GetOutputDimensions(cc, src_tex.width(), src_tex.height(), &out_width,
                      &out_height);
  auto dst_tex = gpu_helper_.CreateDestinationTexture(out_width, out_height);

  // Run cropping shader on GPU.
  {
    gpu_helper_.BindFramebuffer(dst_tex);

    glActiveTexture(GL_TEXTURE1);
    glBindTexture(src_tex.target(), src_tex.name());

    GlRender();

    glActiveTexture(GL_TEXTURE2);
    glBindTexture(GL_TEXTURE_2D, 0);
    glFlush();
  }

  // Send result image in GPU packet.
  auto output = dst_tex.GetFrame<mediapipe::GpuBuffer>();
  cc->Outputs().Tag(kImageGpuTag).Add(output.release(), cc->InputTimestamp());

  // Cleanup
  src_tex.Release();
  dst_tex.Release();
#endif  // !MEDIAPIPE_DISABLE_GPU

  return absl::OkStatus();
}

void ImageCroppingCalculator::GlRender() {
#if !MEDIAPIPE_DISABLE_GPU
  static const GLfloat square_vertices[] = {
      -1.0f, -1.0f,  // bottom left
      1.0f,  -1.0f,  // bottom right
      -1.0f, 1.0f,   // top left
      1.0f,  1.0f,   // top right
  };
  const GLfloat* texture_vertices = &transformed_points_[0];

  // program
  glUseProgram(program_);

  // vertex storage
  GLuint vbo[2];
  glGenBuffers(2, vbo);
  GLuint vao;
  glGenVertexArrays(1, &vao);
  glBindVertexArray(vao);

  // vbo 0
  glBindBuffer(GL_ARRAY_BUFFER, vbo[0]);
  glBufferData(GL_ARRAY_BUFFER, 4 * 2 * sizeof(GLfloat), square_vertices,
               GL_STATIC_DRAW);
  glEnableVertexAttribArray(ATTRIB_VERTEX);
  glVertexAttribPointer(ATTRIB_VERTEX, 2, GL_FLOAT, 0, 0, nullptr);

  // vbo 1
  glBindBuffer(GL_ARRAY_BUFFER, vbo[1]);
  glBufferData(GL_ARRAY_BUFFER, 4 * 2 * sizeof(GLfloat), texture_vertices,
               GL_STATIC_DRAW);
  glEnableVertexAttribArray(ATTRIB_TEXTURE_POSITION);
  glVertexAttribPointer(ATTRIB_TEXTURE_POSITION, 2, GL_FLOAT, 0, 0, nullptr);

  // draw
  glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);

  // cleanup
  glDisableVertexAttribArray(ATTRIB_VERTEX);
  glDisableVertexAttribArray(ATTRIB_TEXTURE_POSITION);
  glBindBuffer(GL_ARRAY_BUFFER, 0);
  glBindVertexArray(0);
  glDeleteVertexArrays(1, &vao);
  glDeleteBuffers(2, vbo);

#endif  // !MEDIAPIPE_DISABLE_GPU
}

absl::Status ImageCroppingCalculator::InitGpu(CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
  const GLint attr_location[NUM_ATTRIBUTES] = {
      ATTRIB_VERTEX,
      ATTRIB_TEXTURE_POSITION,
  };
  const GLchar* attr_name[NUM_ATTRIBUTES] = {
      "position",
      "texture_coordinate",
  };

  // Simple pass-through shader.
  const GLchar* frag_src = GLES_VERSION_COMPAT
      R"(
  #if __VERSION__ < 130
    #define in varying
  #endif  // __VERSION__ < 130

  #ifdef GL_ES
    #define fragColor gl_FragColor
    precision highp float;
  #else
    #define lowp
    #define mediump
    #define highp
    #define texture2D texture
    out vec4 fragColor;
  #endif  // defined(GL_ES)

    in vec2 sample_coordinate;
    uniform sampler2D input_frame;

    void main() {
      vec4 pix = texture2D(input_frame, sample_coordinate);
      fragColor = pix;
    }
  )";

  // Program
  mediapipe::GlhCreateProgram(mediapipe::kBasicVertexShader, frag_src,
                              NUM_ATTRIBUTES, &attr_name[0], attr_location,
                              &program_);
  RET_CHECK(program_) << "Problem initializing the program.";

  // Parameters
  glUseProgram(program_);
  glUniform1i(glGetUniformLocation(program_, "input_frame"), 1);
#endif  // !MEDIAPIPE_DISABLE_GPU

  return absl::OkStatus();
}

// For GPU only.
void ImageCroppingCalculator::GetOutputDimensions(CalculatorContext* cc,
                                                  int src_width, int src_height,
                                                  int* dst_width,
                                                  int* dst_height) {
  RectSpec specs = GetCropSpecs(cc, src_width, src_height);
  int crop_width = specs.width, crop_height = specs.height;
  float x_center = specs.center_x, y_center = specs.center_y;
  float rotation = specs.rotation;

  const float half_width = crop_width / 2.0f;
  const float half_height = crop_height / 2.0f;
  const float corners[] = {-half_width, -half_height, half_width, -half_height,
                           -half_width, half_height,  half_width, half_height};

  for (int i = 0; i < 4; ++i) {
    const float rotated_x = std::cos(rotation) * corners[i * 2] -
                            std::sin(rotation) * corners[i * 2 + 1];
    const float rotated_y = std::sin(rotation) * corners[i * 2] +
                            std::cos(rotation) * corners[i * 2 + 1];

    transformed_points_[i * 2] = ((rotated_x + x_center) / src_width);
    transformed_points_[i * 2 + 1] = ((rotated_y + y_center) / src_height);
  }

  // Find the boundaries of the transformed rectangle.
  float col_min = transformed_points_[0];
  float col_max = transformed_points_[0];
  float row_min = transformed_points_[1];
  float row_max = transformed_points_[1];
  for (int i = 1; i < 4; ++i) {
    col_min = std::min(col_min, transformed_points_[i * 2]);
    col_max = std::max(col_max, transformed_points_[i * 2]);
    row_min = std::min(row_min, transformed_points_[i * 2 + 1]);
    row_max = std::max(row_max, transformed_points_[i * 2 + 1]);
  }

  int width = static_cast<int>(std::round((col_max - col_min) * src_width));
  int height = static_cast<int>(std::round((row_max - row_min) * src_height));

  float scale =
      std::min({1.0f, output_max_width_ / width, output_max_height_ / height});
  width *= scale;
  height *= scale;

  // Minimum output dimension 1x1 prevents creation of textures with 0x0.
  *dst_width = std::max(1, width);
  *dst_height = std::max(1, height);
}

RectSpec ImageCroppingCalculator::GetCropSpecs(const CalculatorContext* cc,
                                               int src_width, int src_height) {
  // Get the size of the cropping box.
  int crop_width = src_width;
  int crop_height = src_height;
  // Get the center of cropping box. Default is the at the center.
  float x_center = src_width / 2.0f;
  float y_center = src_height / 2.0f;
  // Get the rotation of the cropping box.
  float rotation = 0.0f;
  // Get the normalized width and height if specified by the inputs or options.
  float normalized_width = 0.0f;
  float normalized_height = 0.0f;

  mediapipe::ImageCroppingCalculatorOptions options =
      cc->Options<mediapipe::ImageCroppingCalculatorOptions>();

  // width/height, norm_width/norm_height from input streams take precednece.
  if (cc->Inputs().HasTag(kRectTag)) {
    const auto& rect = cc->Inputs().Tag(kRectTag).Get<Rect>();
    // Only use the rect if it is valid.
    if (rect.width() > 0 && rect.height() > 0) {
      x_center = rect.x_center();
      y_center = rect.y_center();
      crop_width = rect.width();
      crop_height = rect.height();
      rotation = rect.rotation();
    }
  } else if (cc->Inputs().HasTag(kNormRectTag)) {
    const auto& norm_rect =
        cc->Inputs().Tag(kNormRectTag).Get<NormalizedRect>();
    if (norm_rect.width() > 0.0 && norm_rect.height() > 0.0) {
      normalized_width = norm_rect.width();
      normalized_height = norm_rect.height();
      x_center = norm_rect.x_center() * src_width;
      y_center = norm_rect.y_center() * src_height;
      rotation = norm_rect.rotation();
    }
  } else if (cc->Inputs().HasTag(kWidthTag) &&
             cc->Inputs().HasTag(kHeightTag)) {
    crop_width = cc->Inputs().Tag(kWidthTag).Get<int>();
    crop_height = cc->Inputs().Tag(kHeightTag).Get<int>();
  } else if (options.has_width() && options.has_height()) {
    crop_width = options.width();
    crop_height = options.height();
  } else if (options.has_norm_width() && options.has_norm_height()) {
    normalized_width = options.norm_width();
    normalized_height = options.norm_height();
  }

  // Get the crop width and height from the normalized width and height.
  if (normalized_width > 0 && normalized_height > 0) {
    crop_width = std::round(normalized_width * src_width);
    crop_height = std::round(normalized_height * src_height);
  }

  // Rotation and center values from input streams take precedence, so only
  // look at those values in the options if kRectTag and kNormRectTag are not
  // present from the inputs.
  if (!cc->Inputs().HasTag(kRectTag) && !cc->Inputs().HasTag(kNormRectTag)) {
    if (options.has_norm_center_x() && options.has_norm_center_y()) {
      x_center = options.norm_center_x() * src_width;
      y_center = options.norm_center_y() * src_height;
    }
    if (options.has_rotation()) {
      rotation = options.rotation();
    }
  }

  if (rotation == 0.0f) {
    // Adjust the center to the closest integer when the crop size is
    // even-number and to the closest half-integer when the crop size is
    // odd-number.
    if (crop_width % 2 == 0) {
      x_center = std::round(x_center);
    } else {
      x_center = std::round(x_center + 0.5f) - 0.5f;
    }
    if (crop_height % 2 == 0) {
      y_center = std::round(y_center);
    } else {
      y_center = std::round(y_center + 0.5f) - 0.5f;
    }
  }

  return {crop_width, crop_height, x_center, y_center, rotation};
}

absl::Status ImageCroppingCalculator::GetBorderModeForOpenCV(
    CalculatorContext* cc, int* border_mode) {
  mediapipe::ImageCroppingCalculatorOptions options =
      cc->Options<mediapipe::ImageCroppingCalculatorOptions>();

  switch (options.border_mode()) {
    case mediapipe::ImageCroppingCalculatorOptions::BORDER_ZERO:
      *border_mode = cv::BORDER_CONSTANT;
      break;
    case mediapipe::ImageCroppingCalculatorOptions::BORDER_REPLICATE:
      *border_mode = cv::BORDER_REPLICATE;
      break;
    default:
      RET_CHECK_FAIL() << "Unsupported border mode for CPU: "
                       << options.border_mode();
  }

  return absl::OkStatus();
}

}  // namespace mediapipe