chromium/media/gpu/v4l2/test/h264_decoder.cc

// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "media/gpu/v4l2/test/h264_decoder.h"

#include <linux/v4l2-controls.h>
#include <linux/videodev2.h>

#include "base/logging.h"
#include "base/memory/ptr_util.h"
#include "base/notreached.h"
#include "media/gpu/macros.h"
#include "ui/gfx/geometry/rect.h"

namespace media {

namespace v4l2_test {

namespace {
constexpr uint32_t kDriverCodecFourcc = V4L2_PIX_FMT_H264_SLICE;

constexpr uint8_t zigzag_4x4[] = {0, 1,  4,  8,  5, 2,  3,  6,
                                  9, 12, 13, 10, 7, 11, 14, 15};

constexpr uint8_t zigzag_8x8[] = {
    0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,
    12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6,  7,  14, 21, 28,
    35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
    58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63};

// TODO(b/234752983): Set number of buffers in CAPTURE queue dynamically.
// |18| is the minimum number of buffers in the CAPTURE queue required to
// successfully decode all ITUT baseline and main bitstreams.
constexpr uint32_t kNumberOfBuffersInCaptureQueue = 18;

// Comparator struct used for H.264 picture reordering
struct H264PicOrderCompare {
  bool operator()(const H264SliceMetadata* a,
                  const H264SliceMetadata* b) const {
    return a->pic_order_cnt < b->pic_order_cnt;
  }
};

// Extracts bit depth to |bit_depth| from the SPS. Returns true if is able
// to successfully extract bit depth. Otherwise returns false.
bool ParseBitDepth(const H264SPS& sps, uint8_t& bit_depth) {
  // Spec 7.4.2.1.1
  if (sps.bit_depth_luma_minus8 != sps.bit_depth_chroma_minus8) {
    VLOGF(4) << "H264Decoder doesn't support different bit depths between luma"
             << "and chroma, bit_depth_luma_minus8="
             << sps.bit_depth_luma_minus8
             << ", bit_depth_chroma_minus8=" << sps.bit_depth_chroma_minus8;
    return false;
  }
  DCHECK_GE(sps.bit_depth_luma_minus8, 0);
  DCHECK_LE(sps.bit_depth_luma_minus8, 6);
  switch (sps.bit_depth_luma_minus8) {
    case 0:
      bit_depth = 8u;
      break;
    case 2:
      bit_depth = 10u;
      break;
    case 4:
      bit_depth = 12u;
      break;
    case 6:
      bit_depth = 14u;
      break;
    default:
      VLOGF(4) << "Invalid bit depth: "
               << base::checked_cast<int>(sps.bit_depth_luma_minus8 + 8);
      return false;
  }
  return true;
}

// Translates SPS into h264 sps ctrl structure.
v4l2_ctrl_h264_sps SetupSPSCtrl(const H264SPS* sps) {
  struct v4l2_ctrl_h264_sps v4l2_sps = {};

  v4l2_sps.profile_idc = sps->profile_idc;
  v4l2_sps.constraint_set_flags =
      (sps->constraint_set0_flag ? V4L2_H264_SPS_CONSTRAINT_SET0_FLAG : 0) |
      (sps->constraint_set1_flag ? V4L2_H264_SPS_CONSTRAINT_SET1_FLAG : 0) |
      (sps->constraint_set2_flag ? V4L2_H264_SPS_CONSTRAINT_SET2_FLAG : 0) |
      (sps->constraint_set3_flag ? V4L2_H264_SPS_CONSTRAINT_SET3_FLAG : 0) |
      (sps->constraint_set4_flag ? V4L2_H264_SPS_CONSTRAINT_SET4_FLAG : 0) |
      (sps->constraint_set5_flag ? V4L2_H264_SPS_CONSTRAINT_SET5_FLAG : 0);

  v4l2_sps.level_idc = sps->level_idc;
  v4l2_sps.seq_parameter_set_id = sps->seq_parameter_set_id;
  v4l2_sps.chroma_format_idc = sps->chroma_format_idc;
  v4l2_sps.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
  v4l2_sps.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
  v4l2_sps.log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4;
  v4l2_sps.pic_order_cnt_type = sps->pic_order_cnt_type;
  v4l2_sps.log2_max_pic_order_cnt_lsb_minus4 =
      sps->log2_max_pic_order_cnt_lsb_minus4;
  v4l2_sps.max_num_ref_frames = sps->max_num_ref_frames;
  v4l2_sps.num_ref_frames_in_pic_order_cnt_cycle =
      sps->num_ref_frames_in_pic_order_cnt_cycle;

  // Check that SPS offsets for ref frames size matches v4l2 sps.
  static_assert(std::extent<decltype(v4l2_sps.offset_for_ref_frame)>() ==
                    std::extent<decltype(sps->offset_for_ref_frame)>(),
                "SPS Offsets for ref frames size must match");
  for (size_t i = 0; i < std::size(v4l2_sps.offset_for_ref_frame); i++)
    v4l2_sps.offset_for_ref_frame[i] = sps->offset_for_ref_frame[i];

  v4l2_sps.offset_for_non_ref_pic = sps->offset_for_non_ref_pic;
  v4l2_sps.offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field;
  v4l2_sps.pic_width_in_mbs_minus1 = sps->pic_width_in_mbs_minus1;
  v4l2_sps.pic_height_in_map_units_minus1 = sps->pic_height_in_map_units_minus1;

  v4l2_sps.flags = 0;
  if (sps->separate_colour_plane_flag)
    v4l2_sps.flags |= V4L2_H264_SPS_FLAG_SEPARATE_COLOUR_PLANE;
  if (sps->qpprime_y_zero_transform_bypass_flag)
    v4l2_sps.flags |= V4L2_H264_SPS_FLAG_QPPRIME_Y_ZERO_TRANSFORM_BYPASS;
  if (sps->delta_pic_order_always_zero_flag)
    v4l2_sps.flags |= V4L2_H264_SPS_FLAG_DELTA_PIC_ORDER_ALWAYS_ZERO;
  if (sps->gaps_in_frame_num_value_allowed_flag)
    v4l2_sps.flags |= V4L2_H264_SPS_FLAG_GAPS_IN_FRAME_NUM_VALUE_ALLOWED;
  if (sps->frame_mbs_only_flag)
    v4l2_sps.flags |= V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY;
  if (sps->mb_adaptive_frame_field_flag)
    v4l2_sps.flags |= V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD;
  if (sps->direct_8x8_inference_flag)
    v4l2_sps.flags |= V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE;

  return v4l2_sps;
}

// Translates PPS into h264 pps ctrl structure.
v4l2_ctrl_h264_pps SetupPPSCtrl(const H264PPS* pps) {
  struct v4l2_ctrl_h264_pps v4l2_pps = {};
  v4l2_pps.pic_parameter_set_id = pps->pic_parameter_set_id;
  v4l2_pps.seq_parameter_set_id = pps->seq_parameter_set_id;
  v4l2_pps.num_slice_groups_minus1 = pps->num_slice_groups_minus1;
  v4l2_pps.num_ref_idx_l0_default_active_minus1 =
      pps->num_ref_idx_l0_default_active_minus1;
  v4l2_pps.num_ref_idx_l1_default_active_minus1 =
      pps->num_ref_idx_l1_default_active_minus1;
  v4l2_pps.weighted_bipred_idc = pps->weighted_bipred_idc;
  v4l2_pps.pic_init_qp_minus26 = pps->pic_init_qp_minus26;
  v4l2_pps.pic_init_qs_minus26 = pps->pic_init_qs_minus26;
  v4l2_pps.chroma_qp_index_offset = pps->chroma_qp_index_offset;
  v4l2_pps.second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset;

  v4l2_pps.flags = 0;
  if (pps->entropy_coding_mode_flag)
    v4l2_pps.flags |= V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE;
  if (pps->bottom_field_pic_order_in_frame_present_flag)
    v4l2_pps.flags |=
        V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT;
  if (pps->weighted_pred_flag)
    v4l2_pps.flags |= V4L2_H264_PPS_FLAG_WEIGHTED_PRED;
  if (pps->deblocking_filter_control_present_flag)
    v4l2_pps.flags |= V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT;
  if (pps->constrained_intra_pred_flag)
    v4l2_pps.flags |= V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED;
  if (pps->redundant_pic_cnt_present_flag)
    v4l2_pps.flags |= V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT;
  if (pps->transform_8x8_mode_flag)
    v4l2_pps.flags |= V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE;
  if (pps->pic_scaling_matrix_present_flag)
    v4l2_pps.flags |= V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT;

  return v4l2_pps;
}

// Sets up the h264 scaling matrix ctrl and checks against sps
// and pps scaling matrix sizes.
v4l2_ctrl_h264_scaling_matrix SetupScalingMatrix(const H264SPS* sps,
                                                 const H264PPS* pps) {
  struct v4l2_ctrl_h264_scaling_matrix matrix = {};

  // Makes sure that the size of the matrix scaling lists correspond
  // to the PPS scaling matrix sizes.
  static_assert(std::extent<decltype(matrix.scaling_list_4x4)>() <=
                        std::extent<decltype(pps->scaling_list4x4)>() &&
                    std::extent<decltype(matrix.scaling_list_4x4[0])>() <=
                        std::extent<decltype(pps->scaling_list4x4[0])>() &&
                    std::extent<decltype(matrix.scaling_list_8x8)>() <=
                        std::extent<decltype(pps->scaling_list8x8)>() &&
                    std::extent<decltype(matrix.scaling_list_8x8[0])>() <=
                        std::extent<decltype(pps->scaling_list8x8[0])>(),
                "PPS scaling_lists must be of correct size");

  // Makes sure that the size of the matrix scaling lists correspond
  // to the SPS scaling matrix sizes.
  static_assert(std::extent<decltype(matrix.scaling_list_4x4)>() <=
                        std::extent<decltype(sps->scaling_list4x4)>() &&
                    std::extent<decltype(matrix.scaling_list_4x4[0])>() <=
                        std::extent<decltype(sps->scaling_list4x4[0])>() &&
                    std::extent<decltype(matrix.scaling_list_8x8)>() <=
                        std::extent<decltype(sps->scaling_list8x8)>() &&
                    std::extent<decltype(matrix.scaling_list_8x8[0])>() <=
                        std::extent<decltype(sps->scaling_list8x8[0])>(),
                "SPS scaling_lists must be of correct size");

  const auto* scaling_list4x4 = &sps->scaling_list4x4[0];
  const auto* scaling_list8x8 = &sps->scaling_list8x8[0];
  if (pps->pic_scaling_matrix_present_flag) {
    scaling_list4x4 = &pps->scaling_list4x4[0];
    scaling_list8x8 = &pps->scaling_list8x8[0];
  }

  static_assert(std::extent<decltype(matrix.scaling_list_4x4), 1>() ==
                std::extent<decltype(zigzag_4x4)>());
  for (size_t i = 0; i < std::size(matrix.scaling_list_4x4); ++i) {
    for (size_t j = 0; j < std::size(matrix.scaling_list_4x4[i]); ++j) {
      matrix.scaling_list_4x4[i][zigzag_4x4[j]] = scaling_list4x4[i][j];
    }
  }

  static_assert(std::extent<decltype(matrix.scaling_list_8x8), 1>() ==
                std::extent<decltype(zigzag_8x8)>());
  for (size_t i = 0; i < std::size(matrix.scaling_list_8x8); ++i) {
    for (size_t j = 0; j < std::size(matrix.scaling_list_8x8[i]); ++j) {
      matrix.scaling_list_8x8[i][zigzag_8x8[j]] = scaling_list8x8[i][j];
    }
  }

  return matrix;
}

// Sets up v4l2_ctrl_h264_decode_params from data in the H264SliceHeader and
// the current H264SliceMetadata.
v4l2_ctrl_h264_decode_params SetupDecodeParams(
    const H264SliceHeader& slice,
    const H264SliceMetadata& slice_metadata,
    const H264DPB& dpb) {
  v4l2_ctrl_h264_decode_params v4l2_decode_params = {};

  v4l2_decode_params.nal_ref_idc = slice.nal_ref_idc;
  v4l2_decode_params.frame_num = slice.frame_num;
  v4l2_decode_params.idr_pic_id = slice.idr_pic_id;
  v4l2_decode_params.pic_order_cnt_lsb = slice.pic_order_cnt_lsb;
  v4l2_decode_params.delta_pic_order_cnt_bottom =
      slice.delta_pic_order_cnt_bottom;
  v4l2_decode_params.delta_pic_order_cnt0 = slice.delta_pic_order_cnt0;
  v4l2_decode_params.delta_pic_order_cnt1 = slice.delta_pic_order_cnt1;
  v4l2_decode_params.dec_ref_pic_marking_bit_size =
      slice.dec_ref_pic_marking_bit_size;
  v4l2_decode_params.pic_order_cnt_bit_size = slice.pic_order_cnt_bit_size;

  v4l2_decode_params.flags = 0;
  if (slice.idr_pic_flag)
    v4l2_decode_params.flags |= V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC;

  v4l2_decode_params.top_field_order_cnt = slice_metadata.top_field_order_cnt;
  v4l2_decode_params.bottom_field_order_cnt =
      slice_metadata.bottom_field_order_cnt;

  size_t i = 0;
  constexpr size_t kTimestampToNanoSecs = 1000;
  for (const auto& element : dpb) {
    struct v4l2_h264_dpb_entry& entry = v4l2_decode_params.dpb[i++];
    entry = {.reference_ts = element.second.ref_ts_nsec * kTimestampToNanoSecs,
             .pic_num = static_cast<unsigned short>(element.second.pic_num),
             .frame_num = static_cast<unsigned short>(element.second.frame_num),
             .fields = V4L2_H264_FRAME_REF,
             .top_field_order_cnt = element.second.top_field_order_cnt,
             .bottom_field_order_cnt = element.second.bottom_field_order_cnt,
             .flags = static_cast<uint32_t>(
                 V4L2_H264_DPB_ENTRY_FLAG_VALID |
                 (element.second.ref ? V4L2_H264_DPB_ENTRY_FLAG_ACTIVE : 0) |
                 (element.second.long_term_reference_flag
                      ? V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM
                      : 0))};
  }

  return v4l2_decode_params;
}

// Determines whether the current slice is part of the same
// frame as the previous slice.
// From h264 specification 7.4.1.2.4
bool IsNewFrame(const H264SliceMetadata& curr_picture,
                const H264SliceHeader& curr_slice_hdr,
                const H264SPS& sps) {
  bool nalu_size_error = curr_picture.slice_header.nalu_size < 1;

  bool slice_changed =
      curr_slice_hdr.frame_num != curr_picture.slice_header.frame_num ||
      curr_slice_hdr.pic_parameter_set_id !=
          curr_picture.slice_header.pic_parameter_set_id ||
      curr_slice_hdr.nal_ref_idc != curr_picture.slice_header.nal_ref_idc ||
      curr_slice_hdr.idr_pic_flag != curr_picture.slice_header.idr_pic_flag ||
      curr_slice_hdr.idr_pic_id != curr_picture.slice_header.idr_pic_id;

  bool slice_pic_order_changed = false;

  if (sps.pic_order_cnt_type == 0) {
    slice_pic_order_changed =
        curr_slice_hdr.pic_order_cnt_lsb !=
            curr_picture.slice_header.pic_order_cnt_lsb ||
        curr_slice_hdr.delta_pic_order_cnt_bottom !=
            curr_picture.slice_header.delta_pic_order_cnt_bottom;

  } else if (sps.pic_order_cnt_type == 1) {
    slice_pic_order_changed =
        curr_slice_hdr.delta_pic_order_cnt0 !=
            curr_picture.slice_header.delta_pic_order_cnt0 ||
        curr_slice_hdr.delta_pic_order_cnt1 !=
            curr_picture.slice_header.delta_pic_order_cnt1;
  }

  return (nalu_size_error || slice_changed || slice_pic_order_changed);
}

// Returns the maximum DPB Macro Block Size (MBS) per level specified.
// Based on spec table A-2.
uint32_t GetMaxDPBMBS(uint8_t level) {
  switch (level) {
    case H264SPS::kLevelIDC1p0:
      return 396;  // Level 1.0
    case H264SPS::kLevelIDC1B:
      return 396;  // Level 1b
    case H264SPS::kLevelIDC1p1:
      return 900;  // Level 1.1
    case H264SPS::kLevelIDC1p2:
      return 2376;  // Level 1.2
    case H264SPS::kLevelIDC1p3:
      return 2376;  // Level 1.3
    case H264SPS::kLevelIDC2p0:
      return 2376;  // Level 2.0
    case H264SPS::kLevelIDC2p1:
      return 4752;  // Level 2.1
    case H264SPS::kLevelIDC2p2:
      return 8100;  // Level 2.2
    case H264SPS::kLevelIDC3p0:
      return 8100;  // Level 3.0
    case H264SPS::kLevelIDC3p1:
      return 18000;  // Level 3.1
    case H264SPS::kLevelIDC3p2:
      return 20480;  // Level 3.2
    case H264SPS::kLevelIDC4p0:
      return 32768;  // Level 4.0
    case H264SPS::kLevelIDC4p1:
      return 32768;  // Level 4.1
    case H264SPS::kLevelIDC4p2:
      return 34816;  // Level 4.2
    case H264SPS::kLevelIDC5p0:
      return 110400;  // Level 5.0
    case H264SPS::kLevelIDC5p1:
      return 184320;  // Level 5.1
    case H264SPS::kLevelIDC5p2:
    default:
      return 0;
  }
}

}  // namespace

void H264Decoder::ProcessSPS(const int sps_id) {
  const H264SPS* sps = parser_->GetSPS(sps_id);
  gfx::Size new_pic_size = sps->GetCodedSize().value_or(gfx::Size());

  int width_mb = new_pic_size.width() / 16;
  int height_mb = new_pic_size.height() / 16;

  // Spec A.3.1 and A.3.2
  // For Baseline, Constrained Baseline and Main profile, the indicated level is
  // Level 1b if level_idc is equal to 11 and constraint_set3_flag is equal to 1
  uint8_t level = base::checked_cast<uint8_t>(sps->level_idc);
  if ((sps->profile_idc == H264SPS::kProfileIDCBaseline ||
       sps->profile_idc == H264SPS::kProfileIDCConstrainedBaseline ||
       sps->profile_idc == H264SPS::kProfileIDCMain) &&
      level == 11 && sps->constraint_set3_flag) {
    level = 9;  // Level 1b
  }
  int max_dpb_mbs = base::checked_cast<int>(GetMaxDPBMBS(level));

  // MaxDpbFrames from level limits per spec.
  size_t max_dpb_frames = std::min(max_dpb_mbs / (width_mb * height_mb), 16);

  size_t max_dpb_size =
      std::max(static_cast<int>(max_dpb_frames),
               std::max(sps->max_num_ref_frames, sps->max_dec_frame_buffering));

  VideoCodecProfile new_profile =
      H264Parser::ProfileIDCToVideoCodecProfile(sps->profile_idc);
  uint8_t new_bit_depth = 0;
  ParseBitDepth(*sps, new_bit_depth);

  if (sps->vui_parameters_present_flag && sps->bitstream_restriction_flag) {
    max_num_reorder_frames_ =
        base::checked_cast<size_t>(sps->max_num_reorder_frames);
  } else if (sps->constraint_set3_flag) {
    // max_num_reorder_frames not present, infer from profile/constraints
    // (see VUI semantics in spec).
    switch (sps->profile_idc) {
      case 44:
      case 86:
      case 100:
      case 110:
      case 122:
      case 244:
        max_num_reorder_frames_ = 0;
        break;
      default:
        max_num_reorder_frames_ = max_dpb_size;
        break;
    }
  } else {
    max_num_reorder_frames_ = max_dpb_size;
  }

  if (pic_size_ != new_pic_size || dpb_.max_dpb_size_ != max_dpb_size ||
      profile_ != new_profile || bit_depth_ != new_bit_depth) {
    FlushDPB();
    profile_ = new_profile;
    bit_depth_ = new_bit_depth;
    pic_size_ = new_pic_size;
    dpb_.max_dpb_size_ = max_dpb_size;
  }
}

void H264Decoder::FlushDPB() {
  std::vector<H264SliceMetadata*> transmittable_slices =
      dpb_.GetNotOutputtedPicsAppending();
  std::sort(transmittable_slices.begin(), transmittable_slices.end(),
            H264PicOrderCompare());

  for (auto* i : transmittable_slices) {
    i->outputted = true;
    slice_ready_queue_.push(*i);
  }

  dpb_.clear();
}

void H264Decoder::InitializeDecoderLogic() {
  parser_ = std::make_unique<H264Parser>();
  parser_->SetStream(data_stream_->data(), data_stream_->length());

  // Advance through NALUs until the first SPS.  The start of the decodable
  // data in an h.264 bistreams starts with an SPS.
  while (true) {
    H264NALU nalu;
    H264Parser::Result res = parser_->AdvanceToNextNALU(&nalu);
    CHECK(res == H264Parser::kOk);

    if (nalu.nal_unit_type == H264NALU::kSPS) {
      break;
    }
  }

  int sps_id;
  H264Parser::Result res = parser_->ParseSPS(&sps_id);
  CHECK(res == H264Parser::kOk);

  // Process initial SPS in bitstream and navigate to first slice in bitstream
  // to setup ProcessNextFrame for decoding.
  ProcessSPS(sps_id);
  std::unique_ptr<H264NALU> curr_nalu;
  while (true) {
    curr_nalu = std::make_unique<H264NALU>();
    if (parser_->AdvanceToNextNALU(curr_nalu.get()) == H264Parser::kEOStream) {
      break;
    }

    if (curr_nalu->nal_unit_type == H264NALU::kIDRSlice ||
        curr_nalu->nal_unit_type == H264NALU::kNonIDRSlice) {
      break;
    } else if (curr_nalu->nal_unit_type == H264NALU::kPPS) {
      int pps_id;
      CHECK(parser_->ParsePPS(&pps_id) == H264Parser::kOk);
    }
  }

  curr_slice_hdr_ = std::make_unique<H264SliceHeader>();
  CHECK(parser_->ParseSliceHeader(*curr_nalu, curr_slice_hdr_.get()) ==
        H264Parser::kOk);
}

VideoDecoder::Result H264Decoder::SubmitSlice() {
  std::vector<uint8_t> slice_data(
      sizeof(V4L2_STATELESS_H264_START_CODE_ANNEX_B) - 1);
  slice_data[2] = V4L2_STATELESS_H264_START_CODE_ANNEX_B;
  slice_data.insert(slice_data.end(), (curr_slice_hdr_->nalu_data).get(),
                    (curr_slice_hdr_->nalu_data +
                     base::checked_cast<size_t>(curr_slice_hdr_->nalu_size))
                        .get());

  scoped_refptr<MmappedBuffer> OUTPUT_buffer = OUTPUT_queue_->GetBuffer(0);
  OUTPUT_buffer->mmapped_planes()[0].CopyIn(&slice_data[0], slice_data.size());
  OUTPUT_buffer->set_frame_number(global_pic_count_);

  if (!v4l2_ioctl_->QBuf(OUTPUT_queue_, 0)) {
    VLOG(4) << "VIDIOC_QBUF failed for OUTPUT queue.";
    return VideoDecoder::kError;
  }

  global_pic_count_++;
  return VideoDecoder::kOk;
}

VideoDecoder::Result H264Decoder::InitializeSliceMetadata(
    const H264SliceHeader& slice_hdr,
    const H264SPS* sps,
    H264SliceMetadata* slice_metadata) const {
  if (!sps) {
    return VideoDecoder::kError;
  }

  slice_metadata->slice_header = slice_hdr;
  slice_metadata->ref_ts_nsec = global_pic_count_;
  slice_metadata->ref = slice_hdr.nal_ref_idc != 0;
  slice_metadata->frame_num = slice_hdr.frame_num;
  slice_metadata->pic_num = slice_hdr.frame_num;
  slice_metadata->pic_order_cnt_lsb = slice_hdr.pic_order_cnt_lsb;

  const auto visible_rect = sps->GetVisibleRect();
  // If there is no value, then the bitstream is invalid
  CHECK(visible_rect.has_value());
  slice_metadata->visible_rect_ = *visible_rect;

  slice_metadata->long_term_reference_flag = slice_hdr.long_term_reference_flag;

  if (slice_hdr.adaptive_ref_pic_marking_mode_flag) {
    static_assert(sizeof(slice_metadata->ref_pic_marking) ==
                      sizeof(slice_hdr.ref_pic_marking),
                  "Array sizes of ref pic marking do not match.");
    memcpy(slice_metadata->ref_pic_marking, slice_hdr.ref_pic_marking,
           sizeof(slice_metadata->ref_pic_marking));
  }

  // Calculate H264 slice order counts.
  switch (sps->pic_order_cnt_type) {
    // See specification 8.2.1.1.
    case 0: {
      int prev_pic_order_cnt_msb, prev_pic_order_cnt_lsb;
      if (slice_hdr.idr_pic_flag) {
        prev_pic_order_cnt_msb = prev_pic_order_cnt_lsb = 0;
      } else {
        prev_pic_order_cnt_msb = prev_pic_order_.prev_ref_pic_order_cnt_msb;
        prev_pic_order_cnt_lsb = prev_pic_order_.prev_ref_pic_order_cnt_lsb;
      }

      const int max_pic_order_cnt_lsb =
          1 << (sps->log2_max_pic_order_cnt_lsb_minus4 + 4);
      if ((slice_metadata->pic_order_cnt_lsb < prev_pic_order_cnt_lsb) &&
          (prev_pic_order_cnt_lsb - slice_metadata->pic_order_cnt_lsb >=
           max_pic_order_cnt_lsb / 2)) {
        slice_metadata->pic_order_cnt_msb =
            prev_pic_order_cnt_msb + max_pic_order_cnt_lsb;
      } else if ((slice_metadata->pic_order_cnt_lsb > prev_pic_order_cnt_lsb) &&
                 (slice_metadata->pic_order_cnt_lsb - prev_pic_order_cnt_lsb >
                  max_pic_order_cnt_lsb / 2)) {
        slice_metadata->pic_order_cnt_msb =
            prev_pic_order_cnt_msb - max_pic_order_cnt_lsb;
      } else {
        slice_metadata->pic_order_cnt_msb = prev_pic_order_cnt_msb;
      }

      slice_metadata->top_field_order_cnt =
          slice_metadata->pic_order_cnt_msb + slice_metadata->pic_order_cnt_lsb;
      slice_metadata->bottom_field_order_cnt =
          slice_metadata->top_field_order_cnt +
          slice_hdr.delta_pic_order_cnt_bottom;
      break;
    }
    case 1: {
      // TODO(b/234752983): Implement pic ordering for pic order count type 1
      // as defined in H.264 section 8.2.1.2.
      break;
    }
    case 2: {
      // Implements pic ordering for pic order count type 2 as defined
      // in H.264 section 8.2.1.3.
      if (slice_metadata->slice_header.idr_pic_flag) {
        slice_metadata->frame_num_offset = 0;
      } else if (prev_frame_num_ > slice_metadata->pic_num) {
        slice_metadata->frame_num_offset =
            prev_frame_num_offset_ +
            (1 << (sps->log2_max_frame_num_minus4 + 4));
      } else {
        slice_metadata->frame_num_offset = prev_frame_num_offset_;
      }

      int temp_pic_order_cnt;
      if (slice_metadata->slice_header.idr_pic_flag) {
        temp_pic_order_cnt = 0;
      } else if (!slice_metadata->slice_header.nal_ref_idc) {
        temp_pic_order_cnt =
            2 * (slice_metadata->frame_num_offset + slice_metadata->frame_num) -
            1;
      } else {
        temp_pic_order_cnt =
            2 * (slice_metadata->frame_num_offset + slice_metadata->frame_num);
      }

      slice_metadata->top_field_order_cnt = temp_pic_order_cnt;
      slice_metadata->bottom_field_order_cnt = temp_pic_order_cnt;
      break;
    }
    default: {
      VLOGF(4) << "Invalid pic_order_cnt_type: " << sps->pic_order_cnt_type;
      return VideoDecoder::kError;
    }
  }

  slice_metadata->pic_order_cnt =
      std::min(slice_metadata->top_field_order_cnt,
               slice_metadata->bottom_field_order_cnt);

  return VideoDecoder::kOk;
}

VideoDecoder::Result H264Decoder::StartNewFrame(
    bool is_OUTPUT_queue_new,
    H264SliceMetadata* slice_metadata) {
  const H264PPS* pps = parser_->GetPPS(curr_slice_hdr_->pic_parameter_set_id);
  const H264SPS* sps = parser_->GetSPS(pps->seq_parameter_set_id);

  if (InitializeSliceMetadata(*(curr_slice_hdr_.get()), sps, slice_metadata) ==
      VideoDecoder::kError) {
    return VideoDecoder::kError;
  }

  if (curr_slice_hdr_->idr_pic_flag) {
    if (!curr_slice_hdr_->no_output_of_prior_pics_flag) {
      FlushDPB();
    }
    dpb_.clear();
  }

  int max_frame_num = 1 << (sps->log2_max_frame_num_minus4 + 4);
  dpb_.UpdatePicNums(curr_slice_hdr_->frame_num, max_frame_num);

  struct v4l2_ctrl_h264_sps v4l2_sps = SetupSPSCtrl(sps);
  struct v4l2_ctrl_h264_pps v4l2_pps = SetupPPSCtrl(pps);
  struct v4l2_ctrl_h264_scaling_matrix v4l2_matrix =
      SetupScalingMatrix(sps, pps);

  struct v4l2_ext_control ctrls[] = {
      {.id = V4L2_CID_STATELESS_H264_SPS,
       .size = sizeof(v4l2_sps),
       .ptr = &v4l2_sps},
      {.id = V4L2_CID_STATELESS_H264_PPS,
       .size = sizeof(v4l2_pps),
       .ptr = &v4l2_pps},
      {.id = V4L2_CID_STATELESS_H264_SCALING_MATRIX,
       .size = sizeof(v4l2_matrix),
       .ptr = &v4l2_matrix}};
  struct v4l2_ext_controls ext_ctrls = {
      .count = (sizeof(ctrls) / sizeof(ctrls[0])), .controls = ctrls};

  v4l2_ioctl_->SetExtCtrls(OUTPUT_queue_, &ext_ctrls, is_OUTPUT_queue_new);

  return VideoDecoder::kOk;
}

void H264Decoder::ProcessNextFrame() {
  H264SliceMetadata slice_metadata = {};

  const bool is_OUTPUT_queue_new = !OUTPUT_queue_;
  if (!OUTPUT_queue_) {
    CreateOUTPUTQueue(kDriverCodecFourcc);
  }

  StartNewFrame(is_OUTPUT_queue_new, &slice_metadata);
  v4l2_ctrl_h264_decode_params v4l2_decode_params =
      SetupDecodeParams(*curr_slice_hdr_, slice_metadata, dpb_);

  const int pps_id = curr_slice_hdr_->pic_parameter_set_id;
  const int sps_id = parser_->GetPPS(pps_id)->seq_parameter_set_id;

  struct v4l2_ext_control ctrls[] = {
      {.id = V4L2_CID_STATELESS_H264_DECODE_PARAMS,
       .size = sizeof(v4l2_decode_params),
       .ptr = &v4l2_decode_params},
      {.id = V4L2_CID_STATELESS_H264_DECODE_MODE,
       .value = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED}};
  struct v4l2_ext_controls ext_ctrls = {
      .count = (sizeof(ctrls) / sizeof(ctrls[0])), .controls = ctrls};

  v4l2_ioctl_->SetExtCtrls(OUTPUT_queue_, &ext_ctrls);

  SubmitSlice();

  while (true) {
    std::unique_ptr<H264NALU> curr_nalu = std::make_unique<H264NALU>();
    if (parser_->AdvanceToNextNALU(curr_nalu.get()) == H264Parser::kEOStream) {
      stream_finished_ = true;
      break;
    }

    if (curr_nalu->nal_unit_type == H264NALU::kNonIDRSlice ||
        curr_nalu->nal_unit_type == H264NALU::kIDRSlice) {
      curr_slice_hdr_ = std::make_unique<H264SliceHeader>();
      CHECK(parser_->ParseSliceHeader(*curr_nalu, curr_slice_hdr_.get()) ==
            H264Parser::kOk);

      const H264SPS* sps = parser_->GetSPS(sps_id);
      if (IsNewFrame(slice_metadata, *curr_slice_hdr_.get(), *sps)) {
        break;
      }

    } else if (curr_nalu->nal_unit_type == H264NALU::kSPS) {
      int sps_info;
      H264Parser::Result res = parser_->ParseSPS(&sps_info);
      CHECK(res == H264Parser::kOk);
      ProcessSPS(sps_id);

    } else if (curr_nalu->nal_unit_type == H264NALU::kPPS) {
      int pps_info;
      H264Parser::Result res = parser_->ParsePPS(&pps_info);
      CHECK(res == H264Parser::kOk);
    }
    // All other NALU's can be safely dropped/ignored.
  }

  FinishPicture(slice_metadata, sps_id);

  if (stream_finished_) {
    FlushDPB();
  }
}

void H264Decoder::FinishPicture(H264SliceMetadata picture, const int sps_id) {
  v4l2_ioctl_->MediaRequestIocQueue(OUTPUT_queue_);

  if (!CAPTURE_queue_) {
    CreateCAPTUREQueue(kNumberOfBuffersInCaptureQueue);
  }

  v4l2_ioctl_->WaitForRequestCompletion(OUTPUT_queue_);

  uint32_t CAPTURE_id;
  v4l2_ioctl_->DQBuf(CAPTURE_queue_, &CAPTURE_id);

  CAPTURE_queue_->DequeueBufferId(CAPTURE_id);
  picture.capture_queue_buffer_id = CAPTURE_id;

  const std::set<uint32_t> reusable_buffer_slots =
      GetReusableReferenceSlots(*CAPTURE_queue_->GetBuffer(CAPTURE_id).get(),
                                CAPTURE_queue_->queued_buffer_ids());

  for (const auto reusable_buffer_slot : reusable_buffer_slots) {
    if (!v4l2_ioctl_->QBuf(CAPTURE_queue_, reusable_buffer_slot)) {
      VLOGF(4) << "VIDIOC_QBUF failed for CAPTURE queue.";
    }
    // Keeps track of which indices are currently queued in the
    // CAPTURE queue. This will be used to determine which indices
    // can/cannot be refreshed.
    CAPTURE_queue_->QueueBufferId(reusable_buffer_slot);
  }

  if (picture.ref) {
    // If picture is an IDR, need to unmark all unused reference pics.
    // H.264 section 8.2.4.1.2.
    if (picture.slice_header.idr_pic_flag) {
      dpb_.MarkAllUnusedRef();
      if (picture.long_term_reference_flag) {
        picture.long_term_frame_idx = 0;
      }

    } else if (picture.slice_header.adaptive_ref_pic_marking_mode_flag) {
      for (size_t i = 0; i < std::size(picture.ref_pic_marking); ++i) {
        H264DecRefPicMarking* ref_pic_marking = &picture.ref_pic_marking[i];

        // Handle Memory Mgmt operations as specified in specification 8.2.5.4.
        switch (ref_pic_marking->memory_mgmnt_control_operation) {
          case 0:
            break;

          case 1: {
            const int pic_num_x =
                picture.pic_num -
                (ref_pic_marking->difference_of_pic_nums_minus1 + 1);
            dpb_.UnmarkPicByPicNum(pic_num_x);
            break;
          }

          case 2: {
            dpb_.UnmarkLongTerm(ref_pic_marking->long_term_pic_num);
            break;
          }

          case 3: {
            // H.264 section 8.2.5.4.3
            const int pic_num_x =
                picture.pic_num -
                (ref_pic_marking->difference_of_pic_nums_minus1 + 1);
            H264SliceMetadata* short_pic =
                dpb_.GetShortRefPicByPicNum(pic_num_x);
            if (short_pic) {
              H264SliceMetadata* long_term_mark = dpb_.GetLongRefPicByFrameIdx(
                  ref_pic_marking->long_term_frame_idx);

              if (long_term_mark) {
                long_term_mark->ref = false;
              }

              short_pic->long_term_reference_flag = true;
              short_pic->long_term_frame_idx =
                  ref_pic_marking->long_term_frame_idx;
            }
            break;
          }

          case 4: {
            const int max_long_term_frame_idx =
                ref_pic_marking->max_long_term_frame_idx_plus1 - 1;
            dpb_.UnmarkLongTermPicsGreaterThanFrameIndex(
                max_long_term_frame_idx);
            break;
          }

          default:
            break;
        }
      }
    } else {
      // Use a sliding window method decoded reference picture marking process
      // H.264 section 8.2.4.3.
      const H264SPS* sps = parser_->GetSPS(sps_id);
      int num_ref_pics = dpb_.CountRefPics();
      if (num_ref_pics == std::max<int>(sps->max_num_ref_frames, 1)) {
        dpb_.UnmarkLowestFrameNumWrapShortRefPic();
      }
    }

    prev_pic_order_.prev_ref_pic_order_cnt_msb = picture.pic_order_cnt_msb;
    prev_pic_order_.prev_ref_pic_order_cnt_lsb = picture.pic_order_cnt_lsb;
  }

  prev_frame_num_ = picture.frame_num;
  prev_frame_num_offset_ = picture.frame_num_offset;

  dpb_.DeleteUnused();

  std::vector<H264SliceMetadata*> transmittable_slices =
      dpb_.GetNotOutputtedPicsAppending();
  // Include the current slice metadata to the list of transmittable slices.
  transmittable_slices.push_back(&picture);

  std::sort(transmittable_slices.begin(), transmittable_slices.end(),
            H264PicOrderCompare());

  auto output_candidate = transmittable_slices.begin();
  size_t slices_remaining = transmittable_slices.size();

  // Tries to output as many pictures as we can. A picture can be output,
  // if the number of decoded and not yet outputted pictures that would remain
  // in DPB afterwards would at least be equal to |max_num_reorder_frames|.
  while (output_candidate != transmittable_slices.end() &&
         (slices_remaining > max_num_reorder_frames_ ||
          // If the DPB is full and the output candidate has not been
          // outputted or is a reference picture then output this picture.
          (dpb_.size() == dpb_.max_dpb_size_ &&
           ((!(*output_candidate)->outputted || (*output_candidate)->ref)) &&
           slices_remaining))) {
    DVLOG_IF(1, slices_remaining <= max_num_reorder_frames_)
        << "Invalid stream: max_num_reorder_frames not preserved";

    (*output_candidate)->outputted = true;
    slice_ready_queue_.push(**output_candidate);

    // If the outputted picture is not a reference picture, it doesn't have
    // to remain in the DPB and can be removed.
    if (!(*output_candidate)->ref) {
      // Current picture hasn't been inserted into DPB yet, so don't remove it
      // if we managed to output it immediately.
      if ((*output_candidate)->ref_ts_nsec != picture.ref_ts_nsec) {
        dpb_.Delete(**output_candidate);
      }
    }

    ++output_candidate;
    --slices_remaining;
  }

  if (!picture.outputted || picture.ref) {
    dpb_.insert({picture.ref_ts_nsec, picture});
  }

  uint32_t OUTPUT_queue_buffer_id;
  v4l2_ioctl_->DQBuf(OUTPUT_queue_, &OUTPUT_queue_buffer_id);
  v4l2_ioctl_->MediaRequestIocReinit(OUTPUT_queue_);
}

// static
std::unique_ptr<H264Decoder> H264Decoder::Create(
    const base::MemoryMappedFile& stream) {
  auto parser = std::make_unique<H264Parser>();
  parser->SetStream(stream.data(), stream.length());

  // Advance through NALUs until the first SPS.  The start of the decodable
  // data in an h.264 bistreams starts with an SPS.
  while (true) {
    H264NALU nalu;
    H264Parser::Result res = parser->AdvanceToNextNALU(&nalu);
    if (res != H264Parser::kOk) {
      LOG(ERROR) << "Unable to find SPS in stream";
      return nullptr;
    }

    if (nalu.nal_unit_type == H264NALU::kSPS)
      break;
  }

  int sps_id;
  H264Parser::Result res = parser->ParseSPS(&sps_id);
  CHECK(res == H264Parser::kOk);

  const H264SPS* sps = parser->GetSPS(sps_id);
  CHECK(sps);

  std::optional<gfx::Size> coded_size = sps->GetCodedSize();
  CHECK(coded_size);

  auto v4l2_ioctl = std::make_unique<V4L2IoctlShim>(kDriverCodecFourcc);

  return base::WrapUnique(
      new H264Decoder(std::move(v4l2_ioctl), coded_size.value(), stream));
}

H264Decoder::H264Decoder(std::unique_ptr<V4L2IoctlShim> v4l2_ioctl,
                         gfx::Size display_resolution,
                         const base::MemoryMappedFile& data_stream)
    : VideoDecoder::VideoDecoder(std::move(v4l2_ioctl), display_resolution),
      curr_slice_hdr_(nullptr),
      stream_finished_(false),
      data_stream_(data_stream) {}

H264Decoder::~H264Decoder() = default;

std::set<uint32_t> H264Decoder::GetReusableReferenceSlots(
    const MmappedBuffer& buffer,
    std::set<uint32_t> queued_buffer_ids) {
  std::set<uint32_t> reusable_buffer_slots = {};
  const std::set<int> dpb_ids = dpb_.GetHeldCaptureIds();
  for (size_t i = 0; i < CAPTURE_queue_->num_buffers(); i++) {
    // Check that index is not currently queued in the CAPTURE queue and
    // that it is not the same buffer index previously written to.
    if (!queued_buffer_ids.count(i) && i != buffer.buffer_id()) {
      if (dpb_ids.find(i) == dpb_ids.end()) {
        reusable_buffer_slots.insert(i);
      }
    }
  }
  return reusable_buffer_slots;
}

VideoDecoder::Result H264Decoder::DecodeNextFrame(const int frame_number,
                                                  std::vector<uint8_t>& y_plane,
                                                  std::vector<uint8_t>& u_plane,
                                                  std::vector<uint8_t>& v_plane,
                                                  gfx::Size& size,
                                                  BitDepth& bit_depth) {
  // If this is the start of the Decoder, initialize Decoder state.
  if (!parser_) {
    InitializeDecoderLogic();
  }

  // Keep decoding until either decoder has parsed entire bitstream or there is
  // a decoded frame ready.
  while (!stream_finished_ && slice_ready_queue_.empty()) {
    ProcessNextFrame();
  }

  if (stream_finished_ && slice_ready_queue_.empty()) {
    return VideoDecoder::kEOStream;
  }

  if (slice_ready_queue_.empty()) {
    NOTREACHED_IN_MIGRATION() << "Stream ended with |slice_ready_queue_| empty";
  }

  H264SliceMetadata picture = slice_ready_queue_.front();
  last_decoded_frame_visible_ = picture.outputted;
  scoped_refptr<MmappedBuffer> buffer =
      CAPTURE_queue_->GetBuffer(picture.capture_queue_buffer_id);
  size = picture.visible_rect_.size();

  if (!picture.visible_rect_.origin().IsOrigin()) {
    // TODO(b/315491484): Handle cropping with non-zero origin
    LOG(INFO) << "Non-zero visible rect origin.";
  }

  bit_depth =
      ConvertToYUV(y_plane, u_plane, v_plane, size, buffer->mmapped_planes(),
                   CAPTURE_queue_->resolution(), CAPTURE_queue_->fourcc());

  slice_ready_queue_.pop();
  return VideoDecoder::kOk;
}

}  // namespace v4l2_test
}  // namespace media