chromium/media/gpu/windows/d3d11_av1_accelerator.cc

// Copyright 2020 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif

#include "media/gpu/windows/d3d11_av1_accelerator.h"

#include <numeric>
#include <utility>

#include "base/memory/ptr_util.h"
#include "base/memory/raw_ptr.h"
#include "base/metrics/histogram_functions.h"
#include "media/gpu/av1_picture.h"
#include "media/gpu/codec_picture.h"
#include "media/gpu/windows/d3d11_picture_buffer.h"

namespace media {

using DecodeStatus = AV1Decoder::AV1Accelerator::Status;

class D3D11AV1Picture : public AV1Picture {
 public:
  explicit D3D11AV1Picture(D3D11PictureBuffer* d3d11_picture,
                           D3D11VideoDecoderClient* client,
                           bool apply_grain)
      : picture_buffer_(d3d11_picture),
        client_(client),
        apply_grain_(apply_grain),
        picture_index_(d3d11_picture->picture_index()) {
    picture_buffer_->set_in_picture_use(true);
  }

  bool apply_grain() const { return apply_grain_; }
  D3D11PictureBuffer* picture_buffer() const { return picture_buffer_; }

 protected:
  ~D3D11AV1Picture() override { picture_buffer_->set_in_picture_use(false); }

 private:
  scoped_refptr<AV1Picture> CreateDuplicate() override {
    // We've already sent off the base frame for rendering, so we can just stamp
    // |picture_buffer_| with the updated timestamp.
    client_->UpdateTimestamp(picture_buffer_);
    return this;
  }

  const raw_ptr<D3D11PictureBuffer> picture_buffer_;
  const raw_ptr<D3D11VideoDecoderClient> client_;
  const bool apply_grain_;
  const size_t picture_index_;
};

D3D11AV1Accelerator::D3D11AV1Accelerator(D3D11VideoDecoderClient* client,
                                         MediaLog* media_log)
    : media_log_(media_log->Clone()), client_(client) {
  DCHECK(client_);
}

D3D11AV1Accelerator::~D3D11AV1Accelerator() {}

scoped_refptr<AV1Picture> D3D11AV1Accelerator::CreateAV1Picture(
    bool apply_grain) {
  D3D11PictureBuffer* picture_buffer = client_->GetPicture();
  return picture_buffer ? base::MakeRefCounted<D3D11AV1Picture>(
                              picture_buffer, client_, apply_grain)
                        : nullptr;
}

bool D3D11AV1Accelerator::SubmitDecoderBuffer(
    const DXVA_PicParams_AV1& pic_params,
    const libgav1::Vector<libgav1::TileBuffer>& tile_buffers) {
  // Buffer #1 - AV1 specific picture parameters.
  auto params_buffer =
      client_->GetWrapper()->GetPictureParametersBuffer(sizeof(pic_params));
  if (params_buffer.size() < sizeof(pic_params)) {
    MEDIA_LOG(ERROR, media_log_)
        << "Insufficient picture parameter buffer size";
    return false;
  }

  memcpy(params_buffer.data(), &pic_params, sizeof(pic_params));

  // Buffer #2 - Slice control data.
  const auto tile_size = sizeof(DXVA_Tile_AV1) * tile_buffers.size();
  auto tile_buffer = client_->GetWrapper()->GetSliceControlBuffer(tile_size);
  if (tile_buffer.size() < tile_size) {
    MEDIA_LOG(ERROR, media_log_) << "Insufficient slice control buffer size";
    return false;
  }

  auto* tiles = reinterpret_cast<DXVA_Tile_AV1*>(tile_buffer.data());

  // Buffer #3 - Tile buffer bitstream data.
  const size_t bitstream_size = std::accumulate(
      tile_buffers.begin(), tile_buffers.end(), 0,
      [](size_t acc, const auto& buffer) { return acc + buffer.size; });
  auto& bitstream_buffer =
      client_->GetWrapper()->GetBitstreamBuffer(bitstream_size);
  if (bitstream_buffer.size() < bitstream_size) {
    MEDIA_LOG(ERROR, media_log_) << "Insufficient bitstream buffer size";
    return false;
  }

  size_t tile_offset = 0;
  for (size_t i = 0; i < tile_buffers.size(); ++i) {
    const auto& tile = tile_buffers[i];
    tiles[i].DataOffset = tile_offset;
    tiles[i].DataSize = tile.size;
    tiles[i].row = i / pic_params.tiles.cols;
    tiles[i].column = i % pic_params.tiles.cols;
    tiles[i].anchor_frame = 0xFF;

    CHECK_EQ(bitstream_buffer.Write({tile.data, tile.size}), tile.size);
    tile_offset += tile.size;
  }

  // Commit the buffers we prepared above. Bitstream buffer will be committed
  // by SubmitSlice() so we don't explicitly commit here.
  return params_buffer.Commit() && tile_buffer.Commit() &&
         client_->GetWrapper()->SubmitSlice();
}

DecodeStatus D3D11AV1Accelerator::SubmitDecode(
    const AV1Picture& pic,
    const libgav1::ObuSequenceHeader& seq_header,
    const AV1ReferenceFrameVector& ref_frames,
    const libgav1::Vector<libgav1::TileBuffer>& tile_buffers,
    base::span<const uint8_t> data) {
  const D3D11AV1Picture* pic_ptr = static_cast<const D3D11AV1Picture*>(&pic);
  if (!client_->GetWrapper()->WaitForFrameBegins(pic_ptr->picture_buffer())) {
    return DecodeStatus::kFail;
  }

  DXVA_PicParams_AV1 pic_params = {0};
  FillPicParams(pic_ptr->picture_buffer()->picture_index(),
                pic_ptr->apply_grain(), pic.frame_header, seq_header,
                ref_frames, &pic_params);

  if (!SubmitDecoderBuffer(pic_params, tile_buffers)) {
    // Errors are logged during SubmitDecoderBuffer.
    return DecodeStatus::kFail;
  }

  return client_->GetWrapper()->SubmitDecode() ? DecodeStatus::kOk
                                               : DecodeStatus::kFail;
}

bool D3D11AV1Accelerator::OutputPicture(const AV1Picture& pic) {
  const auto* pic_ptr = static_cast<const D3D11AV1Picture*>(&pic);
  return client_->OutputResult(pic_ptr, pic_ptr->picture_buffer());
}

void D3D11AV1Accelerator::FillPicParams(
    size_t picture_index,
    bool apply_grain,
    const libgav1::ObuFrameHeader& frame_header,
    const libgav1::ObuSequenceHeader& seq_header,
    const AV1ReferenceFrameVector& ref_frames,
    DXVA_PicParams_AV1* pp) {
  // Note: Unclear from documentation if DXVA wants these values -1. The docs
  // say they correspond to the "minus_1" variants... Microsoft's dav1d
  // implementation uses the full values.
  pp->width = frame_header.width;
  pp->height = frame_header.height;
  pp->max_width = seq_header.max_frame_width;
  pp->max_height = seq_header.max_frame_height;

  pp->CurrPicTextureIndex = picture_index;
  pp->superres_denom = frame_header.use_superres
                           ? frame_header.superres_scale_denominator
                           : libgav1::kSuperResScaleNumerator;
  pp->bitdepth = seq_header.color_config.bitdepth;
  pp->seq_profile = seq_header.profile;

  const auto& tile_info = frame_header.tile_info;
  pp->tiles.cols = tile_info.tile_columns;
  pp->tiles.rows = tile_info.tile_rows;
  pp->tiles.context_update_id = tile_info.context_update_id;

  if (tile_info.uniform_spacing) {
    // TODO(b/174802667): Just use tile_column_width_in_superblocks and
    // tile_row_height_in_superblocks once they're always populated by libgav1.
    const auto tile_width_sb =
        (tile_info.sb_columns + (1 << tile_info.tile_columns_log2) - 1) >>
        tile_info.tile_columns_log2;
    const int last_width_idx = tile_info.tile_columns - 1;
    for (int i = 0; i < last_width_idx; ++i)
      pp->tiles.widths[i] = tile_width_sb;
    pp->tiles.widths[last_width_idx] =
        tile_info.sb_columns - last_width_idx * tile_width_sb;

    const auto tile_height_sb =
        (tile_info.sb_rows + (1 << tile_info.tile_rows_log2) - 1) >>
        tile_info.tile_rows_log2;
    const int last_height_idx = tile_info.tile_rows - 1;
    for (int i = 0; i < last_height_idx; ++i)
      pp->tiles.heights[i] = tile_height_sb;
    pp->tiles.heights[last_height_idx] =
        tile_info.sb_rows - last_height_idx * tile_height_sb;
  } else {
    for (int i = 0; i < pp->tiles.cols; ++i) {
      pp->tiles.widths[i] =
          frame_header.tile_info.tile_column_width_in_superblocks[i];
    }
    for (int i = 0; i < pp->tiles.rows; ++i) {
      pp->tiles.heights[i] =
          frame_header.tile_info.tile_row_height_in_superblocks[i];
    }
  }

  pp->coding.use_128x128_superblock = seq_header.use_128x128_superblock;
  pp->coding.intra_edge_filter = seq_header.enable_intra_edge_filter;
  pp->coding.interintra_compound = seq_header.enable_interintra_compound;
  pp->coding.masked_compound = seq_header.enable_masked_compound;

  // Note: The ObuSequenceHeader has a |enable_warped_motion| field and the
  // ObuFrameHeader has a |allow_warped_motion|. Per the DXVA spec,
  // "[warped_motion] corresponds to the syntax element named
  // allow_warped_motion from the specification."
  pp->coding.warped_motion = frame_header.allow_warped_motion;

  pp->coding.dual_filter = seq_header.enable_dual_filter;
  pp->coding.jnt_comp = seq_header.enable_jnt_comp;

  // Another field in both the sequence and frame header, per the DXVA spec:
  // "[screen_content_tools] corresponds to the syntax element named
  // allow_screen_content_tools from the specification."
  pp->coding.screen_content_tools = frame_header.allow_screen_content_tools;

  // Another field in both the sequence and frame header, per the DXVA spec:
  // "[integer_mv] corresponds to the syntax element named force_integer_mv
  // from the specification."
  pp->coding.integer_mv = frame_header.force_integer_mv;

  pp->coding.cdef = seq_header.enable_cdef;
  pp->coding.restoration = seq_header.enable_restoration;
  pp->coding.film_grain = seq_header.film_grain_params_present;
  pp->coding.intrabc = frame_header.allow_intrabc;
  pp->coding.high_precision_mv = frame_header.allow_high_precision_mv;
  pp->coding.switchable_motion_mode = frame_header.is_motion_mode_switchable;
  pp->coding.filter_intra = seq_header.enable_filter_intra;
  pp->coding.disable_frame_end_update_cdf =
      !frame_header.enable_frame_end_update_cdf;
  pp->coding.disable_cdf_update = !frame_header.enable_cdf_update;
  pp->coding.reference_mode = frame_header.reference_mode_select;
  pp->coding.skip_mode = frame_header.skip_mode_present;
  pp->coding.reduced_tx_set = frame_header.reduced_tx_set;
  pp->coding.superres = frame_header.use_superres;
  pp->coding.tx_mode = frame_header.tx_mode;
  pp->coding.use_ref_frame_mvs = frame_header.use_ref_frame_mvs;
  pp->coding.enable_ref_frame_mvs = seq_header.enable_ref_frame_mvs;
  pp->coding.reference_frame_update =
      !(frame_header.show_existing_frame &&
        frame_header.frame_type == libgav1::kFrameKey);

  pp->format.frame_type = frame_header.frame_type;
  pp->format.show_frame = frame_header.show_frame;
  pp->format.showable_frame = frame_header.showable_frame;
  pp->format.subsampling_x = seq_header.color_config.subsampling_x;
  pp->format.subsampling_y = seq_header.color_config.subsampling_y;
  pp->format.mono_chrome = seq_header.color_config.is_monochrome;

  pp->primary_ref_frame = frame_header.primary_reference_frame;
  pp->order_hint = frame_header.order_hint;
  pp->order_hint_bits = seq_header.order_hint_bits;

  for (size_t i = 0; i < libgav1::kNumReferenceFrameTypes - 1; ++i) {
    if (libgav1::IsIntraFrame(frame_header.frame_type)) {
      pp->frame_refs[i].Index = 0xFF;
      continue;
    }

    const auto ref_idx = frame_header.reference_frame_index[i];
    const auto* rp =
        static_cast<const D3D11AV1Picture*>(ref_frames[ref_idx].get());
    if (!rp) {
      pp->frame_refs[i].Index = 0xFF;
      continue;
    }

    pp->frame_refs[i].width = rp->frame_header.width;
    pp->frame_refs[i].height = rp->frame_header.height;

    const auto& gm =
        frame_header.global_motion[libgav1::kReferenceFrameLast + i];
    for (size_t j = 0; j < 6; ++j)
      pp->frame_refs[i].wmmat[j] = gm.params[j];
    pp->frame_refs[i].wminvalid =
        gm.type == libgav1::kGlobalMotionTransformationTypeIdentity;

    pp->frame_refs[i].wmtype = gm.type;
    pp->frame_refs[i].Index = ref_idx;
  }

  for (size_t i = 0; i < libgav1::kNumReferenceFrameTypes; ++i) {
    const auto* rp = static_cast<const D3D11AV1Picture*>(ref_frames[i].get());
    pp->RefFrameMapTextureIndex[i] =
        rp ? rp->picture_buffer()->picture_index() : 0xFF;
  }

  pp->loop_filter.filter_level[0] = frame_header.loop_filter.level[0];
  pp->loop_filter.filter_level[1] = frame_header.loop_filter.level[1];
  pp->loop_filter.filter_level_u = frame_header.loop_filter.level[2];
  pp->loop_filter.filter_level_v = frame_header.loop_filter.level[3];
  pp->loop_filter.sharpness_level = frame_header.loop_filter.sharpness;
  pp->loop_filter.mode_ref_delta_enabled =
      frame_header.loop_filter.delta_enabled;
  pp->loop_filter.mode_ref_delta_update = frame_header.loop_filter.delta_update;
  pp->loop_filter.delta_lf_multi = frame_header.delta_lf.multi;
  pp->loop_filter.delta_lf_present = frame_header.delta_lf.present;

  for (size_t i = 0; i < libgav1::kNumReferenceFrameTypes; ++i)
    pp->loop_filter.ref_deltas[i] = frame_header.loop_filter.ref_deltas[i];
  pp->loop_filter.mode_deltas[0] = frame_header.loop_filter.mode_deltas[0];
  pp->loop_filter.mode_deltas[1] = frame_header.loop_filter.mode_deltas[1];
  pp->loop_filter.delta_lf_res = frame_header.delta_lf.scale;

  for (size_t i = 0; i < libgav1::kMaxPlanes; ++i) {
    constexpr uint8_t kD3D11LoopRestorationMapping[4] = {
        0,  // libgav1::kLoopRestorationTypeNone,
        3,  // libgav1::kLoopRestorationTypeSwitchable,
        1,  // libgav1::kLoopRestorationTypeWiener,
        2,  // libgav1::kLoopRestorationTypeSgrProj
    };

    pp->loop_filter.frame_restoration_type[i] =
        kD3D11LoopRestorationMapping[frame_header.loop_restoration.type[i]];
    pp->loop_filter.log2_restoration_unit_size[i] =
        frame_header.loop_restoration.unit_size_log2[i];
  }

  pp->quantization.delta_q_present = frame_header.delta_q.present;
  pp->quantization.delta_q_res = frame_header.delta_q.scale;
  pp->quantization.base_qindex = frame_header.quantizer.base_index;
  pp->quantization.y_dc_delta_q = frame_header.quantizer.delta_dc[0];
  pp->quantization.u_dc_delta_q = frame_header.quantizer.delta_dc[1];
  pp->quantization.v_dc_delta_q = frame_header.quantizer.delta_dc[2];
  pp->quantization.u_ac_delta_q = frame_header.quantizer.delta_ac[1];
  pp->quantization.v_ac_delta_q = frame_header.quantizer.delta_ac[2];
  pp->quantization.qm_y = frame_header.quantizer.use_matrix
                              ? frame_header.quantizer.matrix_level[0]
                              : 0xFF;
  pp->quantization.qm_u = frame_header.quantizer.use_matrix
                              ? frame_header.quantizer.matrix_level[1]
                              : 0xFF;
  pp->quantization.qm_v = frame_header.quantizer.use_matrix
                              ? frame_header.quantizer.matrix_level[2]
                              : 0xFF;

  // libgav1 stores the computed versions of the cdef values, so we must undo
  // the computation for DXVA. See ObuParser::ParseCdefParameters().
  const uint8_t coeff_shift = pp->bitdepth - 8;
  pp->cdef.damping = frame_header.cdef.damping - coeff_shift - 3u;
  pp->cdef.bits = frame_header.cdef.bits;
  for (size_t i = 0; i < libgav1::kMaxCdefStrengths; ++i) {
    // libgav1's computation will give values of |4| for secondary strengths
    // despite it being a two-bit entry with range 0-3, so check for this, and
    // subtract.
    // See https://aomediacodec.github.io/av1-spec/#cdef-params-syntax
    uint8_t y_str = frame_header.cdef.y_secondary_strength[i] >> coeff_shift;
    uint8_t uv_str = frame_header.cdef.uv_secondary_strength[i] >> coeff_shift;
    y_str = y_str == 4 ? 3 : y_str;
    uv_str = uv_str == 4 ? 3 : uv_str;
    pp->cdef.y_strengths[i].primary =
        frame_header.cdef.y_primary_strength[i] >> coeff_shift;
    pp->cdef.y_strengths[i].secondary = y_str;
    pp->cdef.uv_strengths[i].primary =
        frame_header.cdef.uv_primary_strength[i] >> coeff_shift;
    pp->cdef.uv_strengths[i].secondary = uv_str;
  }

  pp->interp_filter = frame_header.interpolation_filter;

  pp->segmentation.enabled = frame_header.segmentation.enabled;
  pp->segmentation.update_map = frame_header.segmentation.update_map;
  pp->segmentation.update_data = frame_header.segmentation.update_data;
  pp->segmentation.temporal_update = frame_header.segmentation.temporal_update;
  for (size_t i = 0; i < libgav1::kMaxSegments; ++i) {
    for (size_t j = 0; j < libgav1::kSegmentFeatureMax; ++j) {
      pp->segmentation.feature_mask[i].mask |=
          frame_header.segmentation.feature_enabled[i][j] << j;
      pp->segmentation.feature_data[i][j] =
          frame_header.segmentation.feature_data[i][j];
    }
  }

  if (apply_grain) {
    const auto& fg = frame_header.film_grain_params;
    pp->film_grain.apply_grain = fg.apply_grain;
    pp->film_grain.scaling_shift_minus8 = fg.chroma_scaling - 8;
    pp->film_grain.chroma_scaling_from_luma = fg.chroma_scaling_from_luma;
    pp->film_grain.ar_coeff_lag = fg.auto_regression_coeff_lag;
    pp->film_grain.ar_coeff_shift_minus6 = fg.auto_regression_shift - 6;
    pp->film_grain.grain_scale_shift = fg.grain_scale_shift;
    pp->film_grain.overlap_flag = fg.overlap_flag;
    pp->film_grain.clip_to_restricted_range = fg.clip_to_restricted_range;
    pp->film_grain.matrix_coeff_is_identity =
        seq_header.color_config.matrix_coefficients ==
        libgav1::kMatrixCoefficientsIdentity;
    pp->film_grain.grain_seed = fg.grain_seed;
    pp->film_grain.num_y_points = fg.num_y_points;
    for (uint8_t i = 0; i < fg.num_y_points; ++i) {
      pp->film_grain.scaling_points_y[i][0] = fg.point_y_value[i];
      pp->film_grain.scaling_points_y[i][1] = fg.point_y_scaling[i];
    }
    pp->film_grain.num_cb_points = fg.num_u_points;
    for (uint8_t i = 0; i < fg.num_u_points; ++i) {
      pp->film_grain.scaling_points_cb[i][0] = fg.point_u_value[i];
      pp->film_grain.scaling_points_cb[i][1] = fg.point_u_scaling[i];
    }
    pp->film_grain.num_cr_points = fg.num_v_points;
    for (uint8_t i = 0; i < fg.num_v_points; ++i) {
      pp->film_grain.scaling_points_cr[i][0] = fg.point_v_value[i];
      pp->film_grain.scaling_points_cr[i][1] = fg.point_v_scaling[i];
    }
    for (size_t i = 0; i < std::size(fg.auto_regression_coeff_y); ++i) {
      pp->film_grain.ar_coeffs_y[i] = fg.auto_regression_coeff_y[i] + 128;
    }
    for (size_t i = 0; i < std::size(fg.auto_regression_coeff_u); ++i) {
      pp->film_grain.ar_coeffs_cb[i] = fg.auto_regression_coeff_u[i] + 128;
      pp->film_grain.ar_coeffs_cr[i] = fg.auto_regression_coeff_v[i] + 128;
    }
    // libgav1 will provide the multipliers by subtracting 128 and the offsets
    // by subtracting 256. Restore values as DXVA spec requires values without
    // subtraction.
    if (fg.num_u_points > 0) {
      pp->film_grain.cb_mult = fg.u_multiplier + 128;
      pp->film_grain.cb_luma_mult = fg.u_luma_multiplier + 128;
      pp->film_grain.cb_offset = fg.u_offset + 256;
    }
    if (fg.num_v_points > 0) {
      pp->film_grain.cr_mult = fg.v_multiplier + 128;
      pp->film_grain.cr_luma_mult = fg.v_luma_multiplier + 128;
      pp->film_grain.cr_offset = fg.v_offset + 256;
    }
  }

  // StatusReportFeedbackNumber "should not be equal to 0"... but it crashes :|
  // pp->StatusReportFeedbackNumber = ++status_feedback_;
}

}  // namespace media