chromium/third_party/mediapipe/src/mediapipe/calculators/tensor/tensor_converter_calculator.cc

// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <cstdint>
#include <memory>
#include <utility>
#include <vector>

#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_format.h"
#include "absl/types/optional.h"
#include "mediapipe/calculators/tensor/tensor_converter_calculator.pb.h"
#include "mediapipe/calculators/tensor/tensor_converter_cpu.h"
#include "mediapipe/calculators/tensor/tensor_converter_gpu.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/matrix.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/memory_manager.h"
#include "mediapipe/framework/memory_manager_service.h"
#include "mediapipe/framework/port.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/gpu/gpu_origin.pb.h"

#if !MEDIAPIPE_DISABLE_GPU
#include "mediapipe/gpu/gpu_buffer.h"
#include "mediapipe/gpu/gpu_buffer_format.h"
#include "mediapipe/gpu/gpu_origin.pb.h"

#if MEDIAPIPE_METAL_ENABLED
#include "mediapipe/calculators/tensor/tensor_converter_metal.h"
#import "mediapipe/gpu/MPPMetalHelper.h"
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#include "mediapipe/gpu/gl_calculator_helper.h"

#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
#include "mediapipe/calculators/tensor/tensor_converter_gl31.h"
#else
#include "mediapipe/calculators/tensor/tensor_converter_gl30.h"
#endif  // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31

#endif  // MEDIAPIPE_METAL_ENABLED
#endif  // !MEDIAPIPE_DISABLE_GPU

namespace {

// Commonly used to compute the number of blocks to launch in a kernel.
int NumGroups(const int size, const int group_size) {  // NOLINT
  return (size + group_size - 1) / group_size;
}

absl::StatusOr<bool> ShouldFlipVertically(
    const mediapipe::TensorConverterCalculatorOptions& options, bool use_gpu) {
  if (options.has_flip_vertically() && options.has_gpu_origin()) {
    return absl::FailedPreconditionError(absl::StrFormat(
        "Cannot specify both flip_vertically and gpu_origin options"));
  }

  if (!options.has_gpu_origin()) {
    // Fall back to flip_vertically.
    return options.flip_vertically();
  }

  // Warn if gpu_origin is specified with a CPU input image.
  // Those are always TOP_LEFT, so no flipping is necessary.
  if (!use_gpu) {
    ABSL_LOG(WARNING)
        << "Ignoring gpu_origin option since IMAGE_GPU input is not specified";
    return false;
  }

  switch (options.gpu_origin()) {
    case mediapipe::GpuOrigin::TOP_LEFT:
      return false;
    case mediapipe::GpuOrigin::DEFAULT:
    case mediapipe::GpuOrigin::CONVENTIONAL:
      // TOP_LEFT on Metal, BOTTOM_LEFT on OpenGL.
#ifdef __APPLE__
      return false;
#else
      return true;
#endif
    default:
      return absl::InvalidArgumentError(
          absl::StrFormat("Unhandled GPU origin %i", options.gpu_origin()));
  }
}

constexpr char kImageFrameTag[] = "IMAGE";
constexpr char kGpuBufferTag[] = "IMAGE_GPU";
constexpr char kTensorsTag[] = "TENSORS";
constexpr char kTensorTag[] = "TENSOR";
constexpr char kMatrixTag[] = "MATRIX";

constexpr std::pair<float, float> kDefaultOutputRange = {0.0f, 1.0f};

}  // namespace

namespace mediapipe {

// Calculator for normalizing and converting an ImageFrame, GpuBuffer or Matrix
// into a Tensor.
//
// This calculator is designed to be used with the TfLiteInferenceCalculator,
// as a pre-processing step for calculator inputs.
//
// IMAGE and IMAGE_GPU inputs are normalized to [-1,1] (default) or [0,1],
// specified by options (unless outputting a quantized tensor).
//
// Input:
//  One of the following tags:
//  IMAGE - ImageFrame (assumed to be 8-bit or 32-bit data).
//  IMAGE_GPU - GpuBuffer (assumed to be RGBA or RGB GL texture).
//  MATRIX - Matrix.
//
// Output:
//  One of the following tags:
//  TENSORS - Vector of Tensors of type kFloat32. The resource type used:
//          - MTLBuffer if Metal API is available
//          - SSBO if Metal is unavailable and OpenGL ES 3.1 is available
//          - Texture2D if Metal and GLES 3.1 are not available and GLES 3.0 is.
//  TENSOR  - Tensor of type kFloat32. Resource type same as in TENSORS
//
// Example use:
// node {
//   calculator: "TensorConverterCalculator"
//   input_stream: "IMAGE:input_image"
//   output_stream: "TENSORS:image_tensor"
//   options: {
//     [mediapipe.TensorConverterCalculatorOptions.ext] {
//       zero_center: true
//     }
//   }
// }
//
// IMPORTANT Notes:
//  GPU tensors are currently only supported on mobile platforms.

class TensorConverterCalculator : public CalculatorBase {
 public:
  static absl::Status GetContract(CalculatorContract* cc);

  absl::Status Open(CalculatorContext* cc) override;
  absl::Status Process(CalculatorContext* cc) override;
  absl::Status Close(CalculatorContext* cc) override;

 private:
  absl::Status InitGpu(CalculatorContext* cc);
  absl::Status LoadOptions(CalculatorContext* cc, bool use_gpu);
  absl::StatusOr<std::optional<Tensor>> ProcessCPU(CalculatorContext* cc);
  absl::StatusOr<std::optional<Tensor>> ProcessGPU(CalculatorContext* cc);

#if MEDIAPIPE_METAL_ENABLED
  MPPMetalHelper* gpu_helper_ = nullptr;
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
  GlCalculatorHelper gpu_helper_;
#endif  // MEDIAPIPE_METAL_ENABLED
  bool initialized_ = false;
  bool use_gpu_ = false;
  std::optional<std::pair<float, float>> output_range_;
  bool flip_vertically_ = false;
  bool row_major_matrix_ = false;
  int max_num_channels_ = 3;

  std::unique_ptr<TensorConverterGpu> tensor_converter_gpu_;

  // Enable pooling of AHWBs in Tensor instances.
  MemoryManager* memory_manager_ = nullptr;
};
REGISTER_CALCULATOR(TensorConverterCalculator);

absl::Status TensorConverterCalculator::GetContract(CalculatorContract* cc) {
  // Confirm only one of the input streams is present.
  RET_CHECK(static_cast<int>(cc->Inputs().HasTag(kImageFrameTag)) +
                static_cast<int>(cc->Inputs().HasTag(kGpuBufferTag)) +
                static_cast<int>(cc->Inputs().HasTag(kMatrixTag)) ==
            1)
      << "Only one input tag of {IMAGE, IMAGE_GPU, MATRIX} may be specified";

  if (cc->Inputs().HasTag(kImageFrameTag)) {
    cc->Inputs().Tag(kImageFrameTag).Set<ImageFrame>();
  }
  if (cc->Inputs().HasTag(kMatrixTag)) {
    cc->Inputs().Tag(kMatrixTag).Set<Matrix>();
  }
  cc->UseService(kMemoryManagerService).Optional();
#if !MEDIAPIPE_DISABLE_GPU
  if (cc->Inputs().HasTag(kGpuBufferTag)) {
    cc->Inputs().Tag(kGpuBufferTag).Set<mediapipe::GpuBuffer>();
#if MEDIAPIPE_METAL_ENABLED
    MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]);
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
    MP_RETURN_IF_ERROR(GlCalculatorHelper::UpdateContract(cc));
#endif  // MEDIAPIPE_METAL_ENABLED
  }
#endif  // !MEDIAPIPE_DISABLE_GPU

  RET_CHECK(cc->Outputs().HasTag(kTensorsTag) ^
            cc->Outputs().HasTag(kTensorTag))
      << "One and only one of TENSOR or TENSORS should be set";
  if (cc->Outputs().HasTag(kTensorsTag)) {
    cc->Outputs().Tag(kTensorsTag).Set<std::vector<Tensor>>();
  }
  if (cc->Outputs().HasTag(kTensorTag)) {
    cc->Outputs().Tag(kTensorTag).Set<Tensor>();
  }

  return absl::OkStatus();
}

absl::Status TensorConverterCalculator::Open(CalculatorContext* cc) {
  if (cc->Service(kMemoryManagerService).IsAvailable()) {
    memory_manager_ = &cc->Service(kMemoryManagerService).GetObject();
  }
  cc->SetOffset(TimestampDiff(0));

#if !MEDIAPIPE_DISABLE_GPU
  if (cc->Inputs().HasTag(kGpuBufferTag)) {
    use_gpu_ = true;
#if MEDIAPIPE_METAL_ENABLED
    gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc];
    RET_CHECK(gpu_helper_);
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
    MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
#endif  // MEDIAPIPE_METAL_ENABLED
  }
#endif  // !MEDIAPIPE_DISABLE_GPU

  MP_RETURN_IF_ERROR(LoadOptions(cc, use_gpu_));

  return absl::OkStatus();
}

absl::Status TensorConverterCalculator::Process(CalculatorContext* cc) {
  MP_ASSIGN_OR_RETURN(auto maybe_tensor,
                      [&]() -> absl::StatusOr<std::optional<Tensor>> {
                        if (use_gpu_) {
                          if (cc->Inputs().Tag(kGpuBufferTag).IsEmpty()) {
                            return std::nullopt;
                          }
                          // Convert to GPU tensors type.
                          return ProcessGPU(cc);
                        } else {
                          // Convert to CPU tensors or Matrix type.
                          return ProcessCPU(cc);
                        }
                      }());

  if (maybe_tensor) {
    if (cc->Outputs().HasTag(kTensorsTag)) {
      auto output = std::make_unique<std::vector<Tensor>>();
      output->push_back(*std::move(maybe_tensor));
      cc->Outputs()
          .Tag(kTensorsTag)
          .Add(output.release(), cc->InputTimestamp());
    } else {
      auto output = std::make_unique<Tensor>(*std::move(maybe_tensor));
      cc->Outputs().Tag(kTensorTag).Add(output.release(), cc->InputTimestamp());
    }
  }
  return absl::OkStatus();
}

absl::Status TensorConverterCalculator::Close(CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
  if (use_gpu_) {
#if MEDIAPIPE_METAL_ENABLED
    tensor_converter_gpu_.reset();
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
    gpu_helper_.RunInGlContext([this] { tensor_converter_gpu_.reset(); });
#endif  // MEDIAPIPE_METAL_ENABLED
  }
#endif  // !MEDIAPIPE_DISABLE_GPU
  return absl::OkStatus();
}

absl::StatusOr<std::optional<Tensor>> TensorConverterCalculator::ProcessCPU(
    CalculatorContext* cc) {
  if (cc->Inputs().HasTag(kImageFrameTag)) {
    if (cc->Inputs().Tag(kImageFrameTag).IsEmpty()) {
      return std::nullopt;
    }
    const auto& image_frame =
        cc->Inputs().Tag(kImageFrameTag).Get<ImageFrame>();
    MP_ASSIGN_OR_RETURN(
        Tensor output,
        ConvertImageFrameToTensorOnCpu(
            image_frame,
            output_range_.has_value() ? output_range_.value()
                                      : kDefaultOutputRange,
            flip_vertically_, max_num_channels_, memory_manager_));
    return std::move(output);
  } else if (cc->Inputs().HasTag(kMatrixTag)) {
    if (cc->Inputs().Tag(kMatrixTag).IsEmpty()) {
      return std::nullopt;
    }
    const auto& matrix = cc->Inputs().Tag(kMatrixTag).Get<Matrix>();
    MP_ASSIGN_OR_RETURN(
        Tensor output,
        ConvertMatrixToTensorOnCpu(matrix, row_major_matrix_, memory_manager_));
    return std::move(output);
  } else {
    return std::nullopt;
  }
}

absl::StatusOr<std::optional<Tensor>> TensorConverterCalculator::ProcessGPU(
    CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
  if (!initialized_) {
    MP_RETURN_IF_ERROR(InitGpu(cc));
    initialized_ = true;
  }
  const auto& input =
      cc->Inputs().Tag(kGpuBufferTag).Get<mediapipe::GpuBuffer>();
#if MEDIAPIPE_METAL_ENABLED
  Tensor output = tensor_converter_gpu_->Convert(input);
  return std::move(output);
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
  std::optional<Tensor> output;
  MP_RETURN_IF_ERROR(
      gpu_helper_.RunInGlContext([this, &output, &input]() -> absl::Status {
        output = tensor_converter_gpu_->Convert(input);
        return absl::OkStatus();
      }));
  return std::move(output);
#endif  // MEDIAPIPE_METAL_ENABLED
#else
  RET_CHECK_FAIL() << "GPU processing is not enabled.";
#endif  // !MEDIAPIPE_DISABLE_GPU

  return std::nullopt;
}

absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
  // Get input image sizes.
  const auto& input =
      cc->Inputs().Tag(kGpuBufferTag).Get<mediapipe::GpuBuffer>();
  mediapipe::GpuBufferFormat format = input.format();
  const bool include_alpha = (max_num_channels_ == 4);
  const bool single_channel = (max_num_channels_ == 1);

  RET_CHECK(format == mediapipe::GpuBufferFormat::kBGRA32 ||
            format == mediapipe::GpuBufferFormat::kRGB24 ||
            format == mediapipe::GpuBufferFormat::kRGBA32 ||
            format == mediapipe::GpuBufferFormat::kRGBAFloat128 ||
            format == mediapipe::GpuBufferFormat::kRGBAHalf64 ||
            format == mediapipe::GpuBufferFormat::kGrayFloat32 ||
            format == mediapipe::GpuBufferFormat::kGrayHalf16 ||
            format == mediapipe::GpuBufferFormat::kOneComponent8)
      << "Unsupported GPU input format: " << static_cast<uint32_t>(format);
  if (include_alpha) {
    RET_CHECK(format == mediapipe::GpuBufferFormat::kBGRA32 ||
              format == mediapipe::GpuBufferFormat::kRGBA32 ||
              format == mediapipe::GpuBufferFormat::kRGBAFloat128 ||
              format == mediapipe::GpuBufferFormat::kRGBAHalf64)
        << "Num input channels is less than desired output, input format: "
        << static_cast<uint32_t>(format);
  }

#if MEDIAPIPE_METAL_ENABLED
  MP_ASSIGN_OR_RETURN(
      tensor_converter_gpu_,
      CreateTensorConverterMetal(gpu_helper_, memory_manager_, output_range_,
                                 include_alpha, single_channel,
                                 flip_vertically_, max_num_channels_));
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
  MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext(
      [this, &input, &include_alpha, &single_channel]() -> absl::Status {
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
        MP_ASSIGN_OR_RETURN(
            tensor_converter_gpu_,
            CreateTensorConverterGl31(
                gpu_helper_, memory_manager_, input.width(), input.height(),
                output_range_, include_alpha, single_channel, flip_vertically_,
                max_num_channels_));
#else
        MP_ASSIGN_OR_RETURN(
            tensor_converter_gpu_,
            CreateTensorConverterGl30(
                gpu_helper_, memory_manager_, input.width(), input.height(),
                output_range_, include_alpha, single_channel, flip_vertically_,
                max_num_channels_));
#endif  // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
        return absl::OkStatus();
      }));
#endif  // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#endif  // !MEDIAPIPE_DISABLE_GPU
  return absl::OkStatus();
}

absl::Status TensorConverterCalculator::LoadOptions(CalculatorContext* cc,
                                                    bool use_gpu) {
  // Get calculator options specified in the graph.
  const auto& options =
      cc->Options<::mediapipe::TensorConverterCalculatorOptions>();

  // if zero_center, set output float range to match [-1, 1] as specified in
  // calculator proto.
  if (options.zero_center()) {
    output_range_.emplace(std::pair<float, float>(-1.0, 1.0));
  }

  // Custom output_tensor_float_range values.
  // If the float range is specified in pb text, use the specified values
  // instead.
  if (options.has_output_tensor_float_range()) {
    output_range_.emplace(options.output_tensor_float_range().min(),
                          options.output_tensor_float_range().max());
    ABSL_CHECK_GT(output_range_->second, output_range_->first);
  }

  // Custom div and sub values.
  if (options.use_custom_normalization()) {
    output_range_.emplace(std::pair<float, float>(
        -options.custom_sub(),
        -options.custom_sub() + 255.0 / options.custom_div()));
  }

  // Get y-flip mode.
  MP_ASSIGN_OR_RETURN(flip_vertically_, ShouldFlipVertically(options, use_gpu));

  // Get row_major_matrix mode.
  row_major_matrix_ = options.row_major_matrix();

  // Get desired way to handle input channels.
  max_num_channels_ = options.max_num_channels();
  ABSL_CHECK_GE(max_num_channels_, 1);
  ABSL_CHECK_LE(max_num_channels_, 4);
  ABSL_CHECK_NE(max_num_channels_, 2);
  return absl::OkStatus();
}

}  // namespace mediapipe