chromium/third_party/mediapipe/src/mediapipe/calculators/tensor/tensor_converter_cpu.cc

// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "mediapipe/calculators/tensor/tensor_converter_cpu.h"

#include <algorithm>
#include <cstdint>
#include <utility>

#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/matrix.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/memory_manager.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status_macros.h"

namespace mediapipe {
namespace {

typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
    RowMajorMatrixXf;
typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>
    ColMajorMatrixXf;

template <class T>
absl::Status NormalizeImage(const ImageFrame& image_frame, bool flip_vertically,
                            const std::pair<float, float>& output_range,
                            int max_num_channels, float* tensor_ptr) {
  const int height = image_frame.Height();
  const int width = image_frame.Width();
  const int channels = image_frame.NumberOfChannels();
  const int channels_preserved = std::min(channels, max_num_channels);
  const int channels_ignored = channels - channels_preserved;

  RET_CHECK_NE(output_range.first, output_range.second);
  const float scale = (output_range.second - output_range.first) / 255.0f;
  const float bias = output_range.first;

  for (int i = 0; i < height; ++i) {
    const T* image_ptr = reinterpret_cast<const T*>(
        image_frame.PixelData() +
        (flip_vertically ? height - 1 - i : i) * image_frame.WidthStep());
    for (int j = 0; j < width; ++j) {
      for (int c = 0; c < channels_preserved; ++c) {
        *tensor_ptr++ = *image_ptr++ * scale + bias;
      }
      image_ptr += channels_ignored;
    }
  }
  return absl::OkStatus();
}

}  // namespace

absl::Status NormalizeUInt8Image(const ImageFrame& image_frame,
                                 bool flip_vertically,
                                 const std::pair<float, float>& output_range,
                                 int max_num_channels, float* tensor_ptr) {
  return NormalizeImage<uint8_t>(image_frame, flip_vertically, output_range,
                                 max_num_channels, tensor_ptr);
}

absl::Status NormalizeFloatImage(const ImageFrame& image_frame,
                                 bool flip_vertically,
                                 const std::pair<float, float>& output_range,
                                 int max_num_channels, float* tensor_ptr) {
  return NormalizeImage<float>(image_frame, flip_vertically, output_range,
                               max_num_channels, tensor_ptr);
}

absl::Status CopyMatrixToTensor(const Matrix& matrix, bool is_row_major_matrix,
                                float* tensor_ptr) {
  if (is_row_major_matrix) {
    auto matrix_map =
        Eigen::Map<RowMajorMatrixXf>(tensor_ptr, matrix.rows(), matrix.cols());
    matrix_map = matrix;
  } else {
    auto matrix_map =
        Eigen::Map<ColMajorMatrixXf>(tensor_ptr, matrix.rows(), matrix.cols());
    matrix_map = matrix;
  }
  return absl::OkStatus();
}

absl::StatusOr<Tensor> ConvertImageFrameToTensorOnCpu(
    const ImageFrame& image_frame, const std::pair<float, float>& output_range,
    bool flip_vertically, int max_num_channels, MemoryManager* memory_manager) {
  const int height = image_frame.Height();
  const int width = image_frame.Width();
  const int channels = image_frame.NumberOfChannels();
  const int channels_preserved = std::min(channels, max_num_channels);
  const mediapipe::ImageFormat::Format format = image_frame.Format();

  if (!(format == mediapipe::ImageFormat::SRGBA ||
        format == mediapipe::ImageFormat::SRGB ||
        format == mediapipe::ImageFormat::GRAY8 ||
        format == mediapipe::ImageFormat::VEC32F1))
    RET_CHECK_FAIL() << "Unsupported CPU input format.";

  Tensor output_tensor(Tensor::ElementType::kFloat32,
                       Tensor::Shape{1, height, width, channels_preserved},
                       memory_manager);
  auto cpu_view = output_tensor.GetCpuWriteView();

  // Copy image data into tensor.
  if (image_frame.ByteDepth() == 1) {
    MP_RETURN_IF_ERROR(NormalizeUInt8Image(image_frame, flip_vertically,
                                           output_range, max_num_channels,
                                           cpu_view.buffer<float>()));
  } else if (image_frame.ByteDepth() == 4) {
    MP_RETURN_IF_ERROR(NormalizeFloatImage(image_frame, flip_vertically,
                                           output_range, max_num_channels,
                                           cpu_view.buffer<float>()));
  } else {
    return absl::InternalError(
        "Only byte-based (8 bit) and float (32 bit) images supported.");
  }
  return output_tensor;
}

absl::StatusOr<Tensor> ConvertMatrixToTensorOnCpu(
    const Matrix& matrix, bool row_major_matrix,
    MemoryManager* memory_manager) {
  const int height = matrix.rows();
  const int width = matrix.cols();
  const int channels = 1;
  Tensor output_tensor(Tensor::ElementType::kFloat32,
                       Tensor::Shape{1, height, width, channels},
                       memory_manager);
  MP_RETURN_IF_ERROR(
      CopyMatrixToTensor(matrix, row_major_matrix,
                         output_tensor.GetCpuWriteView().buffer<float>()));
  return output_tensor;
}

}  // namespace mediapipe