chromium/third_party/mediapipe/src/mediapipe/calculators/tensor/inference_calculator_gl_advanced.cc

// Copyright 2022 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "absl/log/absl_log.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/string_view.h"
#include "absl/time/time.h"
#include "mediapipe/calculators/tensor/inference_calculator.h"
#include "mediapipe/calculators/tensor/inference_io_mapper.h"
#include "mediapipe/calculators/tensor/inference_on_disk_cache_helper.h"
#include "mediapipe/calculators/tensor/tensor_span.h"
#include "mediapipe/framework/api2/packet.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/mediapipe_profiling.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/gl_context.h"
#include "mediapipe/util/tflite/tflite_gpu_runner.h"
#include "mediapipe/util/tflite/tflite_model_loader.h"

namespace mediapipe {
namespace api2 {

// Runs TFLite GPU delegate API2 directly, bypassing interpreter usage, and
// allows choosing specific API.
//
// To trigger this code path:
//   [mediapipe.InferenceCalculatorOptions.ext] {
//     delegate {
//       gpu {
//         use_advanced_gpu_api: true
//         api: OPENCL  # or OPENGL or ANY
//       }
//     }
//   }
class InferenceCalculatorGlAdvancedImpl
    : public InferenceCalculatorNodeImpl<InferenceCalculatorGlAdvanced,
                                         InferenceCalculatorGlAdvancedImpl> {
 public:
  static absl::Status UpdateContract(CalculatorContract* cc);

  absl::Status Open(CalculatorContext* cc) override;
  absl::Status Close(CalculatorContext* cc) override;

 private:
  // Helper class that wraps everything related to GPU inference acceleration.
  class GpuInferenceRunner {
   public:
    ~GpuInferenceRunner();

    absl::Status Init(CalculatorContext* cc,
                      std::shared_ptr<GlContext> gl_context);

    absl::StatusOr<std::vector<Tensor>> Process(
        CalculatorContext* cc, const TensorSpan& input_tensors);

    const InputOutputTensorNames& GetInputOutputTensorNames() const;

   private:
    absl::Status InitTFLiteGPURunner(
        CalculatorContext* cc,
        const mediapipe::InferenceCalculatorOptions::Delegate& delegate);

    // TfLite requires us to keep the model alive as long as the interpreter
    // is.
    Packet<TfLiteModelPtr> model_packet_;

    std::shared_ptr<GlContext> initialization_gl_context_;
    std::unique_ptr<tflite::gpu::TFLiteGPURunner> tflite_gpu_runner_;

    std::vector<Tensor::Shape> output_shapes_;

    InferenceOnDiskCacheHelper on_disk_cache_helper_;

    InputOutputTensorNames input_output_tensor_names_;
  };

  absl::StatusOr<std::vector<Tensor>> Process(
      CalculatorContext* cc, const TensorSpan& tensor_span) override;
  absl::StatusOr<std::unique_ptr<GpuInferenceRunner>> CreateInferenceRunner(
      CalculatorContext* cc);

  std::unique_ptr<GpuInferenceRunner> gpu_inference_runner_;
  mediapipe::GlCalculatorHelper gpu_helper_;
};

InferenceCalculatorGlAdvancedImpl::GpuInferenceRunner::~GpuInferenceRunner() {
  const auto success =
      initialization_gl_context_->Run([this]() -> absl::Status {
        tflite_gpu_runner_.reset();
        return absl::OkStatus();
      });
  if (!success.ok()) {
    ABSL_LOG(DFATAL) << "Failed to close gpu inference runner: " << success;
  }
}

absl::Status InferenceCalculatorGlAdvancedImpl::GpuInferenceRunner::Init(
    CalculatorContext* cc, std::shared_ptr<GlContext> gl_context) {
  initialization_gl_context_ = gl_context;
  const auto& options = cc->Options<mediapipe::InferenceCalculatorOptions>();

  mediapipe::InferenceCalculatorOptions::Delegate delegate = options.delegate();
  if (!kDelegate(cc).IsEmpty()) {
    const mediapipe::InferenceCalculatorOptions::Delegate&
        input_side_packet_delegate = kDelegate(cc).Get();
    RET_CHECK(
        input_side_packet_delegate.has_gpu() ||
        input_side_packet_delegate.delegate_case() ==
            mediapipe::InferenceCalculatorOptions::Delegate::DELEGATE_NOT_SET)
        << "inference_calculator_gl_advanced only supports gpu delegate "
           "configuration through side packet.";
    delegate.MergeFrom(input_side_packet_delegate);
  }

  MP_RETURN_IF_ERROR(on_disk_cache_helper_.Init(options, delegate.gpu()));

  return initialization_gl_context_->Run(
      [this, &cc, &delegate]() -> absl::Status {
        return InitTFLiteGPURunner(cc, delegate);
      });
}

absl::StatusOr<std::vector<Tensor>>
InferenceCalculatorGlAdvancedImpl::GpuInferenceRunner::Process(
    CalculatorContext* cc, const TensorSpan& input_tensors) {
  std::vector<Tensor> output_tensors;
  for (int i = 0; i < input_tensors.size(); ++i) {
    MP_RETURN_IF_ERROR(tflite_gpu_runner_->BindSSBOToInputTensor(
        input_tensors[i].GetOpenGlBufferReadView().name(), i));
  }
  output_tensors.reserve(output_shapes_.size());
  for (int i = 0; i < output_shapes_.size(); ++i) {
    output_tensors.emplace_back(Tensor::ElementType::kFloat32,
                                output_shapes_[i]);
    MP_RETURN_IF_ERROR(tflite_gpu_runner_->BindSSBOToOutputTensor(
        output_tensors.back().GetOpenGlBufferWriteView().name(), i));
  }
  // Run inference.
  {
    MEDIAPIPE_PROFILING(GPU_TASK_INVOKE_ADVANCED, cc);
    MP_RETURN_IF_ERROR(tflite_gpu_runner_->Invoke());
  }
  return output_tensors;
}

const InputOutputTensorNames& InferenceCalculatorGlAdvancedImpl::
    GpuInferenceRunner::GetInputOutputTensorNames() const {
  return input_output_tensor_names_;
}

absl::Status
InferenceCalculatorGlAdvancedImpl::GpuInferenceRunner::InitTFLiteGPURunner(
    CalculatorContext* cc,
    const mediapipe::InferenceCalculatorOptions::Delegate& delegate) {
  MP_ASSIGN_OR_RETURN(model_packet_, GetModelAsPacket(cc));
  const auto& model = *model_packet_.Get();

  bool allow_precision_loss = delegate.gpu().allow_precision_loss();

  // Create runner
  tflite::gpu::InferenceOptions options;
  options.priority1 = allow_precision_loss
                          ? tflite::gpu::InferencePriority::MIN_LATENCY
                          : tflite::gpu::InferencePriority::MAX_PRECISION;
  options.priority2 = tflite::gpu::InferencePriority::AUTO;
  options.priority3 = tflite::gpu::InferencePriority::AUTO;
  switch (delegate.gpu().usage()) {
    case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::
        FAST_SINGLE_ANSWER: {
      options.usage = tflite::gpu::InferenceUsage::FAST_SINGLE_ANSWER;
      break;
    }
    case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::
        SUSTAINED_SPEED: {
      options.usage = tflite::gpu::InferenceUsage::SUSTAINED_SPEED;
      break;
    }
    case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::UNSPECIFIED: {
      return absl::InternalError("inference usage need to be specified.");
    }
  }
  tflite_gpu_runner_ = std::make_unique<tflite::gpu::TFLiteGPURunner>(options);
  switch (delegate.gpu().api()) {
    case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::ANY: {
      // Do not need to force any specific API.
      break;
    }
    case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::OPENGL: {
      tflite_gpu_runner_->ForceOpenGL();
      break;
    }
    case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::OPENCL: {
      tflite_gpu_runner_->ForceOpenCL();
      break;
    }
  }
  if (kSideInOpResolver(cc).IsConnected()) {
    const tflite::OpResolver& op_resolver = kSideInOpResolver(cc).Get();
    MP_RETURN_IF_ERROR(tflite_gpu_runner_->InitializeWithModel(
        model, op_resolver, /*allow_quant_ops=*/true));
    MP_ASSIGN_OR_RETURN(input_output_tensor_names_,
                        InferenceIoMapper::GetInputOutputTensorNamesFromModel(
                            model, op_resolver));
  } else {
    tflite::ops::builtin::BuiltinOpResolver op_resolver =
        kSideInCustomOpResolver(cc).GetOr(
            tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates());
    MP_RETURN_IF_ERROR(tflite_gpu_runner_->InitializeWithModel(
        model, op_resolver, /*allow_quant_ops=*/true));
    MP_ASSIGN_OR_RETURN(input_output_tensor_names_,
                        InferenceIoMapper::GetInputOutputTensorNamesFromModel(
                            model, op_resolver));
  }

  // Create and bind OpenGL buffers for outputs.
  // The buffers are created once and their ids are passed to calculator outputs
  output_shapes_.resize(tflite_gpu_runner_->outputs_size());
  for (int i = 0; i < tflite_gpu_runner_->outputs_size(); ++i) {
    output_shapes_[i] = {tflite_gpu_runner_->GetOutputShapes()[i].b,
                         tflite_gpu_runner_->GetOutputShapes()[i].h,
                         tflite_gpu_runner_->GetOutputShapes()[i].w,
                         tflite_gpu_runner_->GetOutputShapes()[i].c};
  }

  if (on_disk_cache_helper_.UseSerializedModel()) {
    tflite_gpu_runner_->ForceOpenCLInitFromSerializedModel();
  }

  MP_RETURN_IF_ERROR(on_disk_cache_helper_.ReadGpuCaches(*tflite_gpu_runner_));
  MP_RETURN_IF_ERROR(tflite_gpu_runner_->Build());
  return on_disk_cache_helper_.SaveGpuCachesBasedOnBehavior(
      *tflite_gpu_runner_);
}

absl::Status InferenceCalculatorGlAdvancedImpl::UpdateContract(
    CalculatorContract* cc) {
  MP_RETURN_IF_ERROR(TensorContractCheck(cc));

  const auto& options = cc->Options<mediapipe::InferenceCalculatorOptions>();
  RET_CHECK(!options.model_path().empty() ^ kSideInModel(cc).IsConnected())
      << "Either model as side packet or model path in options is required.";

  WarnFeedbackTensorsUnsupported(cc);
  MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
  return absl::OkStatus();
}

absl::Status InferenceCalculatorGlAdvancedImpl::Open(CalculatorContext* cc) {
  MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
  gpu_inference_runner_ = std::make_unique<GpuInferenceRunner>();
  MP_RETURN_IF_ERROR(
      gpu_inference_runner_->Init(cc, gpu_helper_.GetSharedGlContext()));
  return InferenceCalculatorNodeImpl::UpdateIoMapping(
      cc, gpu_inference_runner_->GetInputOutputTensorNames());
}

absl::StatusOr<std::vector<Tensor>> InferenceCalculatorGlAdvancedImpl::Process(
    CalculatorContext* cc, const TensorSpan& tensor_span) {
  std::vector<Tensor> output_tensors;
  MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([&]() -> absl::Status {
    MP_ASSIGN_OR_RETURN(output_tensors,
                        gpu_inference_runner_->Process(cc, tensor_span));
    return absl::OkStatus();
  }));
  return output_tensors;
}

absl::Status InferenceCalculatorGlAdvancedImpl::Close(CalculatorContext* cc) {
  gpu_inference_runner_.reset();

  return absl::OkStatus();
}

absl::StatusOr<
    std::unique_ptr<InferenceCalculatorGlAdvancedImpl::GpuInferenceRunner>>
InferenceCalculatorGlAdvancedImpl::CreateInferenceRunner(
    CalculatorContext* cc) {
  auto gpu_inference_runner = std::make_unique<GpuInferenceRunner>();
  MP_RETURN_IF_ERROR(
      gpu_inference_runner->Init(cc, gpu_helper_.GetSharedGlContext()));
  return gpu_inference_runner;
}

}  // namespace api2
}  // namespace mediapipe