chromium/third_party/mediapipe/src/mediapipe/calculators/tensor/image_to_tensor_utils.cc

// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"

#include <array>
#include <cmath>
#include <memory>
#include <utility>

#include "absl/status/statusor.h"
#include "absl/types/optional.h"
#include "mediapipe/framework/api2/packet.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status_macros.h"
#if !MEDIAPIPE_DISABLE_GPU
#include "mediapipe/gpu/gpu_buffer.h"
#endif  // !MEDIAPIPE_DISABLE_GPU

namespace mediapipe {

RotatedRect GetRoi(int input_width, int input_height,
                   absl::optional<mediapipe::NormalizedRect> norm_rect) {
  if (norm_rect) {
    return {/*center_x=*/norm_rect->x_center() * input_width,
            /*center_y =*/norm_rect->y_center() * input_height,
            /*width =*/norm_rect->width() * input_width,
            /*height =*/norm_rect->height() * input_height,
            /*rotation =*/norm_rect->rotation()};
  }
  return {/*center_x=*/0.5f * input_width,
          /*center_y =*/0.5f * input_height,
          /*width =*/static_cast<float>(input_width),
          /*height =*/static_cast<float>(input_height),
          /*rotation =*/0};
}

absl::StatusOr<std::array<float, 4>> PadRoi(int input_tensor_width,
                                            int input_tensor_height,
                                            bool keep_aspect_ratio,
                                            RotatedRect* roi) {
  if (!keep_aspect_ratio) {
    return std::array<float, 4>{0.0f, 0.0f, 0.0f, 0.0f};
  }

  RET_CHECK(input_tensor_width > 0 && input_tensor_height > 0)
      << "Input tensor width and height must be > 0.";
  const float tensor_aspect_ratio =
      static_cast<float>(input_tensor_height) / input_tensor_width;

  RET_CHECK(roi->width > 0 && roi->height > 0)
      << "ROI width and height must be > 0.";
  const float roi_aspect_ratio = roi->height / roi->width;

  float vertical_padding = 0.0f;
  float horizontal_padding = 0.0f;
  float new_width;
  float new_height;
  if (tensor_aspect_ratio > roi_aspect_ratio) {
    new_width = roi->width;
    new_height = roi->width * tensor_aspect_ratio;
    vertical_padding = (1.0f - roi_aspect_ratio / tensor_aspect_ratio) / 2.0f;
  } else {
    new_width = roi->height / tensor_aspect_ratio;
    new_height = roi->height;
    horizontal_padding = (1.0f - tensor_aspect_ratio / roi_aspect_ratio) / 2.0f;
  }

  roi->width = new_width;
  roi->height = new_height;

  return std::array<float, 4>{horizontal_padding, vertical_padding,
                              horizontal_padding, vertical_padding};
}

absl::StatusOr<ValueTransformation> GetValueRangeTransformation(
    float from_range_min, float from_range_max, float to_range_min,
    float to_range_max) {
  RET_CHECK_LT(from_range_min, from_range_max)
      << "Invalid FROM range: min >= max.";
  RET_CHECK_LT(to_range_min, to_range_max) << "Invalid TO range: min >= max.";
  const float scale =
      (to_range_max - to_range_min) / (from_range_max - from_range_min);
  const float offset = to_range_min - from_range_min * scale;
  return ValueTransformation{scale, offset};
}

void GetRotatedSubRectToRectTransformMatrix(const RotatedRect& sub_rect,
                                            int rect_width, int rect_height,
                                            bool flip_horizontally,
                                            std::array<float, 16>* matrix_ptr) {
  std::array<float, 16>& matrix = *matrix_ptr;
  // The resulting matrix is multiplication of below commented out matrices:
  //   post_scale_matrix
  //     * translate_matrix
  //     * rotate_matrix
  //     * flip_matrix
  //     * scale_matrix
  //     * initial_translate_matrix

  // Matrix to convert X,Y to [-0.5, 0.5] range "initial_translate_matrix"
  // { 1.0f,  0.0f, 0.0f, -0.5f}
  // { 0.0f,  1.0f, 0.0f, -0.5f}
  // { 0.0f,  0.0f, 1.0f,  0.0f}
  // { 0.0f,  0.0f, 0.0f,  1.0f}

  const float a = sub_rect.width;
  const float b = sub_rect.height;
  // Matrix to scale X,Y,Z to sub rect "scale_matrix"
  // Z has the same scale as X.
  // {   a, 0.0f, 0.0f, 0.0f}
  // {0.0f,    b, 0.0f, 0.0f}
  // {0.0f, 0.0f,    a, 0.0f}
  // {0.0f, 0.0f, 0.0f, 1.0f}

  const float flip = flip_horizontally ? -1 : 1;
  // Matrix for optional horizontal flip around middle of output image.
  // { fl  , 0.0f, 0.0f, 0.0f}
  // { 0.0f, 1.0f, 0.0f, 0.0f}
  // { 0.0f, 0.0f, 1.0f, 0.0f}
  // { 0.0f, 0.0f, 0.0f, 1.0f}

  const float c = std::cos(sub_rect.rotation);
  const float d = std::sin(sub_rect.rotation);
  // Matrix to do rotation around Z axis "rotate_matrix"
  // {    c,   -d, 0.0f, 0.0f}
  // {    d,    c, 0.0f, 0.0f}
  // { 0.0f, 0.0f, 1.0f, 0.0f}
  // { 0.0f, 0.0f, 0.0f, 1.0f}

  const float e = sub_rect.center_x;
  const float f = sub_rect.center_y;
  // Matrix to do X,Y translation of sub rect within parent rect
  // "translate_matrix"
  // {1.0f, 0.0f, 0.0f, e   }
  // {0.0f, 1.0f, 0.0f, f   }
  // {0.0f, 0.0f, 1.0f, 0.0f}
  // {0.0f, 0.0f, 0.0f, 1.0f}

  const float g = 1.0f / rect_width;
  const float h = 1.0f / rect_height;
  // Matrix to scale X,Y,Z to [0.0, 1.0] range "post_scale_matrix"
  // {g,    0.0f, 0.0f, 0.0f}
  // {0.0f, h,    0.0f, 0.0f}
  // {0.0f, 0.0f,    g, 0.0f}
  // {0.0f, 0.0f, 0.0f, 1.0f}

  // row 1
  matrix[0] = a * c * flip * g;
  matrix[1] = -b * d * g;
  matrix[2] = 0.0f;
  matrix[3] = (-0.5f * a * c * flip + 0.5f * b * d + e) * g;

  // row 2
  matrix[4] = a * d * flip * h;
  matrix[5] = b * c * h;
  matrix[6] = 0.0f;
  matrix[7] = (-0.5f * b * c - 0.5f * a * d * flip + f) * h;

  // row 3
  matrix[8] = 0.0f;
  matrix[9] = 0.0f;
  matrix[10] = a * g;
  matrix[11] = 0.0f;

  // row 4
  matrix[12] = 0.0f;
  matrix[13] = 0.0f;
  matrix[14] = 0.0f;
  matrix[15] = 1.0f;
}

void GetTransposedRotatedSubRectToRectTransformMatrix(
    const RotatedRect& sub_rect, int rect_width, int rect_height,
    bool flip_horizontally, std::array<float, 16>* matrix_ptr) {
  std::array<float, 16>& matrix = *matrix_ptr;
  // See comments in GetRotatedSubRectToRectTransformMatrix for detailed
  // calculations.
  const float a = sub_rect.width;
  const float b = sub_rect.height;
  const float flip = flip_horizontally ? -1 : 1;
  const float c = std::cos(sub_rect.rotation);
  const float d = std::sin(sub_rect.rotation);
  const float e = sub_rect.center_x;
  const float f = sub_rect.center_y;
  const float g = 1.0f / rect_width;
  const float h = 1.0f / rect_height;

  // row 1 (indices 0,4,8,12 from non-transposed fcn)
  matrix[0] = a * c * flip * g;
  matrix[1] = a * d * flip * h;
  matrix[2] = 0.0f;
  matrix[3] = 0.0f;

  // row 2 (indices 1,5,9,13 from non-transposed fcn)
  matrix[4] = -b * d * g;
  matrix[5] = b * c * h;
  matrix[6] = 0.0f;
  matrix[7] = 0.0f;

  // row 3 (indices 2,6,10,14 from non-transposed fcn)
  matrix[8] = 0.0f;
  matrix[9] = 0.0f;
  matrix[10] = a * g;
  matrix[11] = 0.0f;

  // row 4 (indices 3,7,11,15 from non-transposed fcn)
  matrix[12] = (-0.5f * a * c * flip + 0.5f * b * d + e) * g;
  matrix[13] = (-0.5f * b * c - 0.5f * a * d * flip + f) * h;
  matrix[14] = 0.0f;
  matrix[15] = 1.0f;
}

BorderMode GetBorderMode(
    const mediapipe::ImageToTensorCalculatorOptions::BorderMode& mode) {
  switch (mode) {
    case mediapipe::
        ImageToTensorCalculatorOptions_BorderMode_BORDER_UNSPECIFIED:
      return BorderMode::kReplicate;
    case mediapipe::ImageToTensorCalculatorOptions_BorderMode_BORDER_ZERO:
      return BorderMode::kZero;
    case mediapipe::ImageToTensorCalculatorOptions_BorderMode_BORDER_REPLICATE:
      return BorderMode::kReplicate;
  }
}

Tensor::ElementType GetOutputTensorType(bool uses_gpu,
                                        const OutputTensorParams& params) {
  if (!uses_gpu) {
    if (params.is_float_output) {
      return Tensor::ElementType::kFloat32;
    }
    if (params.range_min < 0) {
      return Tensor::ElementType::kInt8;
    } else {
      return Tensor::ElementType::kUInt8;
    }
  }
  // Always use float32 when GPU is enabled.
  return Tensor::ElementType::kFloat32;
}

int GetNumOutputChannels(const mediapipe::Image& image) {
#if !MEDIAPIPE_DISABLE_GPU
#if MEDIAPIPE_METAL_ENABLED
  if (image.UsesGpu()) {
    return 4;
  }
#endif  // MEDIAPIPE_METAL_ENABLED
#endif  // !MEDIAPIPE_DISABLE_GPU
  // TODO: Add a unittest here to test the behavior on GPU, i.e.
  // failure.
  // Only output channel == 1 when running on CPU and the input image channel
  // is 1. Ideally, we want to also support GPU for output channel == 1. But
  // setting this on the safer side to prevent unintentional failure.
  if (!image.UsesGpu() && image.channels() == 1) {
    return 1;
  }
  return 3;
}

absl::StatusOr<std::shared_ptr<const mediapipe::Image>> GetInputImage(
    const api2::Packet<api2::OneOf<Image, mediapipe::ImageFrame>>&
        image_packet) {
  return image_packet.Visit(
      [&image_packet](const mediapipe::Image&) {
        return image_packet.Share<mediapipe::Image>();
      },
      [&image_packet](const mediapipe::ImageFrame&)
          -> absl::StatusOr<std::shared_ptr<const mediapipe::Image>> {
        MP_ASSIGN_OR_RETURN(
            std::shared_ptr<const mediapipe::ImageFrame> image_frame,
            image_packet.Share<mediapipe::ImageFrame>());
        return std::make_shared<const mediapipe::Image>(
            std::const_pointer_cast<mediapipe::ImageFrame>(
                std::move(image_frame)));
      });
}

#if !MEDIAPIPE_DISABLE_GPU
absl::StatusOr<std::shared_ptr<const mediapipe::Image>> GetInputImage(
    const api2::Packet<mediapipe::GpuBuffer>& image_gpu_packet) {
  // A shallow copy is okay since the resulting 'image' object is local in
  // Process(), and thus never outlives 'input'.
  return std::make_shared<const mediapipe::Image>(image_gpu_packet.Get());
}
#endif  // !MEDIAPIPE_DISABLE_GPU

}  // namespace mediapipe