chromium/third_party/mediapipe/src/mediapipe/calculators/image/bilateral_filter_calculator.cc

// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <memory>
#include <string>

#include "absl/log/absl_check.h"
#include "absl/strings/str_replace.h"
#include "mediapipe/calculators/image/bilateral_filter_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_options.pb.h"
#include "mediapipe/framework/formats/image_format.pb.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/vector.h"

#if !MEDIAPIPE_DISABLE_GPU
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/gl_simple_shaders.h"
#include "mediapipe/gpu/shader_util.h"
#endif  // !MEDIAPIPE_DISABLE_GPU

namespace mediapipe {

namespace {
constexpr char kInputFrameTag[] = "IMAGE";
constexpr char kInputGuideTag[] = "GUIDE";
constexpr char kOutputFrameTag[] = "IMAGE";

constexpr char kInputFrameTagGpu[] = "IMAGE_GPU";
constexpr char kInputGuideTagGpu[] = "GUIDE_GPU";
constexpr char kOutputFrameTagGpu[] = "IMAGE_GPU";

enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES };
}  // namespace

// A calculator for applying a bilateral filter to an image,
// with an optional guide image (joint blateral).
//
// Inputs:
//   One of the following two IMAGE tags:
//   IMAGE: ImageFrame containing input image - Grayscale or RGB only.
//   IMAGE_GPU: GpuBuffer containing input image - Grayscale, RGB or RGBA.
//
//   GUIDE (optional): ImageFrame guide image used to filter IMAGE. (N/A).
//   GUIDE_GPU (optional): GpuBuffer guide image used to filter IMAGE_GPU.
//
// Output:
//   One of the following two tags:
//   IMAGE:     A filtered ImageFrame - Same as input.
//   IMAGE_GPU:  A filtered GpuBuffer - RGBA
//
// Options:
//   sigma_space: Pixel radius: use (sigma_space*2+1)x(sigma_space*2+1) window.
//                This should be set based on output image pixel space.
//   sigma_color: Color variance: normalized [0-1] color difference allowed.
//
// Notes:
//   * When GUIDE is present, the output image is same size as GUIDE image;
//     otherwise, the output image is same size as input image.
//   * On GPU the kernel window is subsampled by approximately sqrt(sigma_space)
//     i.e. the step size is ~sqrt(sigma_space),
//     prioritizing performance > quality.
//   * TODO: Add CPU path for joint filter.
//
class BilateralFilterCalculator : public CalculatorBase {
 public:
  BilateralFilterCalculator() = default;
  ~BilateralFilterCalculator() override = default;

  static absl::Status GetContract(CalculatorContract* cc);

  // From Calculator.
  absl::Status Open(CalculatorContext* cc) override;
  absl::Status Process(CalculatorContext* cc) override;
  absl::Status Close(CalculatorContext* cc) override;

 private:
  absl::Status RenderGpu(CalculatorContext* cc);
  absl::Status RenderCpu(CalculatorContext* cc);

  absl::Status GlSetup(CalculatorContext* cc);
  void GlRender(CalculatorContext* cc);

  mediapipe::BilateralFilterCalculatorOptions options_;
  float sigma_color_ = -1.f;
  float sigma_space_ = -1.f;

  bool use_gpu_ = false;
  bool gpu_initialized_ = false;
#if !MEDIAPIPE_DISABLE_GPU
  mediapipe::GlCalculatorHelper gpu_helper_;
  GLuint program_ = 0;
  GLuint vao_;
  GLuint vbo_[2];  // vertex storage
#endif             // !MEDIAPIPE_DISABLE_GPU
};
REGISTER_CALCULATOR(BilateralFilterCalculator);

absl::Status BilateralFilterCalculator::GetContract(CalculatorContract* cc) {
  RET_CHECK_GE(cc->Inputs().NumEntries(), 1);

  if (cc->Inputs().HasTag(kInputFrameTag) &&
      cc->Inputs().HasTag(kInputFrameTagGpu)) {
    return absl::InternalError("Cannot have multiple input images.");
  }
  if (cc->Inputs().HasTag(kInputFrameTagGpu) !=
      cc->Outputs().HasTag(kOutputFrameTagGpu)) {
    return absl::InternalError("GPU output must have GPU input.");
  }

  bool use_gpu = false;

  // Input image to filter.
#if !MEDIAPIPE_DISABLE_GPU
  if (cc->Inputs().HasTag(kInputFrameTagGpu)) {
    cc->Inputs().Tag(kInputFrameTagGpu).Set<mediapipe::GpuBuffer>();
    use_gpu |= true;
  }
#endif  // !MEDIAPIPE_DISABLE_GPU
  if (cc->Inputs().HasTag(kInputFrameTag)) {
    cc->Inputs().Tag(kInputFrameTag).Set<ImageFrame>();
  }

  // Input guide image mask (optional)
#if !MEDIAPIPE_DISABLE_GPU
  if (cc->Inputs().HasTag(kInputGuideTagGpu)) {
    cc->Inputs().Tag(kInputGuideTagGpu).Set<mediapipe::GpuBuffer>();
    use_gpu |= true;
  }
#endif  // !MEDIAPIPE_DISABLE_GPU
  if (cc->Inputs().HasTag(kInputGuideTag)) {
    cc->Inputs().Tag(kInputGuideTag).Set<ImageFrame>();
  }

  // Output image.
#if !MEDIAPIPE_DISABLE_GPU
  if (cc->Outputs().HasTag(kOutputFrameTagGpu)) {
    cc->Outputs().Tag(kOutputFrameTagGpu).Set<mediapipe::GpuBuffer>();
    use_gpu |= true;
  }
#endif  // !MEDIAPIPE_DISABLE_GPU
  if (cc->Outputs().HasTag(kOutputFrameTag)) {
    cc->Outputs().Tag(kOutputFrameTag).Set<ImageFrame>();
  }

  if (use_gpu) {
#if !MEDIAPIPE_DISABLE_GPU
    MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
#endif  // !MEDIAPIPE_DISABLE_GPU
  }

  return absl::OkStatus();
}

absl::Status BilateralFilterCalculator::Open(CalculatorContext* cc) {
  cc->SetOffset(TimestampDiff(0));

  options_ = cc->Options<mediapipe::BilateralFilterCalculatorOptions>();

  if (cc->Inputs().HasTag(kInputFrameTagGpu) &&
      cc->Outputs().HasTag(kOutputFrameTagGpu)) {
#if !MEDIAPIPE_DISABLE_GPU
    use_gpu_ = true;
#else
    RET_CHECK_FAIL() << "GPU processing not enabled.";
#endif
  }

  sigma_color_ = options_.sigma_color();
  sigma_space_ = options_.sigma_space();
  ABSL_CHECK_GE(sigma_color_, 0.0);
  ABSL_CHECK_GE(sigma_space_, 0.0);
  if (!use_gpu_) sigma_color_ *= 255.0;

  if (use_gpu_) {
#if !MEDIAPIPE_DISABLE_GPU
    MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
#endif  // !MEDIAPIPE_DISABLE_GPU
  }

  return absl::OkStatus();
}

absl::Status BilateralFilterCalculator::Process(CalculatorContext* cc) {
  if (use_gpu_) {
#if !MEDIAPIPE_DISABLE_GPU
    MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, cc]() -> absl::Status {
      if (!gpu_initialized_) {
        MP_RETURN_IF_ERROR(GlSetup(cc));
        gpu_initialized_ = true;
      }
      MP_RETURN_IF_ERROR(RenderGpu(cc));
      return absl::OkStatus();
    }));
#endif  // !MEDIAPIPE_DISABLE_GPU
  } else {
    MP_RETURN_IF_ERROR(RenderCpu(cc));
  }

  return absl::OkStatus();
}

absl::Status BilateralFilterCalculator::Close(CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
  gpu_helper_.RunInGlContext([this] {
    if (program_) glDeleteProgram(program_);
    if (vao_) glDeleteVertexArrays(1, &vao_);
    if (vbo_[0]) glDeleteBuffers(2, vbo_);
    program_ = 0;
    vao_ = 0;
    vbo_[0] = 0;
    vbo_[1] = 0;
  });
#endif  // !MEDIAPIPE_DISABLE_GPU

  return absl::OkStatus();
}

absl::Status BilateralFilterCalculator::RenderCpu(CalculatorContext* cc) {
  if (cc->Inputs().Tag(kInputFrameTag).IsEmpty()) {
    return absl::OkStatus();
  }

  const auto& input_frame = cc->Inputs().Tag(kInputFrameTag).Get<ImageFrame>();
  auto input_mat = mediapipe::formats::MatView(&input_frame);

  // Only 1 or 3 channel images supported by OpenCV.
  if (!(input_mat.channels() == 1 || input_mat.channels() == 3)) {
    return absl::InternalError(
        "CPU filtering supports only 1 or 3 channel input images.");
  }

  auto output_frame = absl::make_unique<ImageFrame>(
      input_frame.Format(), input_mat.cols, input_mat.rows);
  const bool has_guide_image = cc->Inputs().HasTag(kInputGuideTag) &&
                               !cc->Inputs().Tag(kInputGuideTag).IsEmpty();

  if (has_guide_image) {
    // cv::jointBilateralFilter() is in contrib module 'ximgproc'.
    return absl::UnimplementedError(
        "CPU joint filtering support is not implemented yet.");
  } else {
    auto output_mat = mediapipe::formats::MatView(output_frame.get());
    // Prefer setting 'd = sigma_space * 2' to match GPU definition of radius.
    cv::bilateralFilter(input_mat, output_mat, /*d=*/sigma_space_ * 2.0,
                        sigma_color_, sigma_space_);
  }

  cc->Outputs()
      .Tag(kOutputFrameTag)
      .Add(output_frame.release(), cc->InputTimestamp());
  return absl::OkStatus();
}

absl::Status BilateralFilterCalculator::RenderGpu(CalculatorContext* cc) {
  if (cc->Inputs().Tag(kInputFrameTagGpu).IsEmpty()) {
    return absl::OkStatus();
  }
#if !MEDIAPIPE_DISABLE_GPU
  const auto& input_frame =
      cc->Inputs().Tag(kInputFrameTagGpu).Get<mediapipe::GpuBuffer>();
  auto input_texture = gpu_helper_.CreateSourceTexture(input_frame);

  mediapipe::GlTexture output_texture;
  const bool has_guide_image = cc->Inputs().HasTag(kInputGuideTagGpu);

  // Setup textures and Update image in GPU shader.
  if (has_guide_image) {
    if (cc->Inputs().Tag(kInputGuideTagGpu).IsEmpty()) return absl::OkStatus();
    // joint bilateral filter
    glUseProgram(program_);
    const auto& guide_image =
        cc->Inputs().Tag(kInputGuideTagGpu).Get<mediapipe::GpuBuffer>();
    auto guide_texture = gpu_helper_.CreateSourceTexture(guide_image);
    glUniform2f(glGetUniformLocation(program_, "texel_size_guide"),
                1.0 / guide_image.width(), 1.0 / guide_image.height());
    output_texture = gpu_helper_.CreateDestinationTexture(
        guide_image.width(), guide_image.height(),
        mediapipe::GpuBufferFormat::kBGRA32);
    gpu_helper_.BindFramebuffer(output_texture);
    glActiveTexture(GL_TEXTURE1);
    glBindTexture(GL_TEXTURE_2D, input_texture.name());
    glActiveTexture(GL_TEXTURE2);
    glBindTexture(GL_TEXTURE_2D, guide_texture.name());
    GlRender(cc);
    glBindTexture(GL_TEXTURE_2D, 0);
    glActiveTexture(GL_TEXTURE1);
    glBindTexture(GL_TEXTURE_2D, 0);
    guide_texture.Release();
  } else {
    // regular bilateral filter
    glUseProgram(program_);
    glUniform2f(glGetUniformLocation(program_, "texel_size"),
                1.0 / input_frame.width(), 1.0 / input_frame.height());
    output_texture = gpu_helper_.CreateDestinationTexture(
        input_frame.width(), input_frame.height(),
        mediapipe::GpuBufferFormat::kBGRA32);
    gpu_helper_.BindFramebuffer(output_texture);
    glActiveTexture(GL_TEXTURE1);
    glBindTexture(GL_TEXTURE_2D, input_texture.name());
    GlRender(cc);
    glBindTexture(GL_TEXTURE_2D, 0);
  }
  glFlush();

  // Send out image as GPU packet.
  auto output_frame = output_texture.GetFrame<mediapipe::GpuBuffer>();
  cc->Outputs()
      .Tag(kOutputFrameTagGpu)
      .Add(output_frame.release(), cc->InputTimestamp());

  // Cleanup
  input_texture.Release();
  output_texture.Release();
#endif  // !MEDIAPIPE_DISABLE_GPU

  return absl::OkStatus();
}

void BilateralFilterCalculator::GlRender(CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
  // bring back vao and vbo
  glBindVertexArray(vao_);

  // draw
  glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);

  // cleanup
  glBindVertexArray(0);
#endif  // !MEDIAPIPE_DISABLE_GPU
}

absl::Status BilateralFilterCalculator::GlSetup(CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
  const GLint attr_location[NUM_ATTRIBUTES] = {
      ATTRIB_VERTEX,
      ATTRIB_TEXTURE_POSITION,
  };
  const GLchar* attr_name[NUM_ATTRIBUTES] = {
      "position",
      "texture_coordinate",
  };

  // Common functions and settings for both shaders.
  const std::string common_string =
      absl::StrReplaceAll(R"(
    const float sigma_space = $space;
    const float sigma_color = $color;

    const float kSparsityFactor = 0.66;  // Higher is more sparse.
    const float sparsity = max(1.0, sqrt(sigma_space) * kSparsityFactor);
    const float step = sparsity;
    const float radius = sigma_space;
    const float offset = (step > 1.0) ? (step * 0.5) : (0.0);

    float gaussian(float x, float sigma) {
      float coeff = -0.5 / (sigma * sigma * 4.0 + 1.0e-6);
      return exp((x * x) * coeff);
    }
  )",
                          {{"$space", std::to_string(sigma_space_)},
                           {"$color", std::to_string(sigma_color_)}});

  // Shader to do bilateral filtering on input image based on sigma space/color.
  // Large kernel sizes are subsampled based on sqrt(sigma_space) window size,
  // denoted as 'sparsity' below.
  const std::string frag_src =
      std::string(mediapipe::kMediaPipeFragmentShaderPreamble) + R"(
    DEFAULT_PRECISION(highp, float)

    in vec2 sample_coordinate;
    uniform sampler2D input_frame;
    uniform vec2 texel_size;

    )" +
      common_string + R"(

    void main() {
      vec2 center_uv = sample_coordinate;
      vec3 center_val = texture2D(input_frame, center_uv).rgb;
      vec3 new_val = vec3(0.0);

      float space_weight = 0.0;
      float color_weight = 0.0;
      float total_weight = 0.0;

      float sigma_texel = max(texel_size.x, texel_size.y) * sigma_space;
      // Subsample kernel space.
      for (float i = -radius+offset; i <= radius; i+=step) {
        for (float j = -radius+offset; j <= radius; j+=step) {
          vec2 shift = vec2(j, i) * texel_size;
          vec2 uv = vec2(center_uv + shift);
          vec3 val = texture2D(input_frame, uv).rgb;

          space_weight = gaussian(distance(center_uv, uv), sigma_texel);
          color_weight = gaussian(distance(center_val, val), sigma_color);
          total_weight += space_weight * color_weight;

          new_val += vec3(space_weight * color_weight) * val;
        }
      }
      new_val /= vec3(total_weight);

      gl_FragColor = vec4(new_val, 1.0);
    }
  )";

  // Shader to do joint bilateral filtering on input image based on
  // sigma space/color, and a Guide image.
  // Large kernel sizes are subsampled based on sqrt(sigma_space) window size,
  // denoted as 'sparsity' below.
  const std::string joint_frag_src =
      std::string(mediapipe::kMediaPipeFragmentShaderPreamble) + R"(
    DEFAULT_PRECISION(highp, float)

    in vec2 sample_coordinate;
    uniform sampler2D input_frame;
    uniform sampler2D guide_frame;
    uniform vec2 texel_size_guide; // size of guide and resulting filtered image

    )" +
      common_string + R"(

    void main() {
      vec2 center_uv = sample_coordinate;
      vec3 center_val = texture2D(guide_frame, center_uv).rgb;
      vec3 new_val = vec3(0.0);

      float space_weight = 0.0;
      float color_weight = 0.0;
      float total_weight = 0.0;

      float sigma_texel = max(texel_size_guide.x, texel_size_guide.y) * sigma_space;
      // Subsample kernel space.
      for (float i = -radius+offset; i <= radius; i+=step) {
        for (float j = -radius+offset; j <= radius; j+=step) {
          vec2 shift = vec2(j, i) * texel_size_guide;
          vec2 uv = vec2(center_uv + shift);
          vec3 guide_val = texture2D(guide_frame, uv).rgb;
          vec3 out_val = texture2D(input_frame, uv).rgb;

          space_weight = gaussian(distance(center_uv, uv), sigma_texel);
          color_weight = gaussian(distance(center_val, guide_val), sigma_color);
          total_weight += space_weight * color_weight;

          new_val += vec3(space_weight * color_weight) * out_val;
        }
      }
      new_val /= vec3(total_weight);

      gl_FragColor = vec4(new_val, 1.0);
    }
  )";

  // Only initialize the one shader to be used.
  const bool has_guide_image = cc->Inputs().HasTag(kInputGuideTagGpu);

  if (has_guide_image) {
    // Create joint shader program and set parameters.
    mediapipe::GlhCreateProgram(
        mediapipe::kBasicVertexShader, joint_frag_src.c_str(), NUM_ATTRIBUTES,
        (const GLchar**)&attr_name[0], attr_location, &program_);
    RET_CHECK(program_) << "Problem initializing the program.";
    glUseProgram(program_);
    glUniform1i(glGetUniformLocation(program_, "input_frame"), 1);
    glUniform1i(glGetUniformLocation(program_, "guide_frame"), 2);
  } else {
    // Create default shader program and set parameters.
    mediapipe::GlhCreateProgram(mediapipe::kBasicVertexShader, frag_src.c_str(),
                                NUM_ATTRIBUTES, (const GLchar**)&attr_name[0],
                                attr_location, &program_);
    RET_CHECK(program_) << "Problem initializing the program.";
    glUseProgram(program_);
    glUniform1i(glGetUniformLocation(program_, "input_frame"), 1);
  }

  // Generate vbos and vao.
  glGenVertexArrays(1, &vao_);
  glGenBuffers(2, vbo_);

  // Fill in static vbo (vbo 0), to be reused in GlRender().
  glBindVertexArray(vao_);
  glBindBuffer(GL_ARRAY_BUFFER, vbo_[0]);
  glBufferData(GL_ARRAY_BUFFER, 4 * 2 * sizeof(GLfloat),
               mediapipe::kBasicSquareVertices, GL_STATIC_DRAW);
  glEnableVertexAttribArray(ATTRIB_VERTEX);
  glVertexAttribPointer(ATTRIB_VERTEX, 2, GL_FLOAT, 0, 0, nullptr);
  glBindBuffer(GL_ARRAY_BUFFER, 0);
  // Fill in static vbo (vbo 1), to be reused in GlRender().
  glBindBuffer(GL_ARRAY_BUFFER, vbo_[1]);
  glBufferData(GL_ARRAY_BUFFER, 4 * 2 * sizeof(GLfloat),
               mediapipe::kBasicTextureVertices, GL_STATIC_DRAW);
  glEnableVertexAttribArray(ATTRIB_TEXTURE_POSITION);
  glVertexAttribPointer(ATTRIB_TEXTURE_POSITION, 2, GL_FLOAT, 0, 0, nullptr);
  glBindBuffer(GL_ARRAY_BUFFER, 0);
  glBindVertexArray(0);

#endif  // !MEDIAPIPE_DISABLE_GPU

  return absl::OkStatus();
}

}  // namespace mediapipe