v4l2_stateful_video_decoder.cc | Explore in Territory

// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "media/gpu/v4l2/v4l2_stateful_video_decoder.h"

#include <fcntl.h>
#include <libdrm/drm_fourcc.h>
#include <poll.h>
#include <sys/eventfd.h>
#include <sys/ioctl.h>

#include "base/containers/contains.h"
#include "base/containers/heap_array.h"
#include "base/files/file_util.h"
#include "base/location.h"
#include "base/memory/ptr_util.h"
#include "base/metrics/histogram_macros.h"
#include "base/posix/eintr_wrapper.h"
#include "base/task/bind_post_task.h"
#include "base/task/sequenced_task_runner.h"
#include "base/task/thread_pool.h"
#include "base/trace_event/trace_event.h"
#include "media/base/media_log.h"
#include "media/base/media_switches.h"
#include "media/gpu/chromeos/video_frame_resource.h"
#include "media/gpu/macros.h"
#include "media/gpu/v4l2/v4l2_framerate_control.h"
#include "media/gpu/v4l2/v4l2_queue.h"
#include "media/gpu/v4l2/v4l2_utils.h"
#include "media/parsers/h264_parser.h"
#include "third_party/abseil-cpp/absl/cleanup/cleanup.h"
#include "ui/gfx/geometry/size.h"

namespace {
// Numerical value of ioctl() OK return value;
constexpr int kIoctlOk = 0;

int HandledIoctl(int fd, int request, void* arg) {
  return HANDLE_EINTR(ioctl(fd, request, arg));
}

void* Mmap(int fd,
           void* addr,
           unsigned int len,
           int prot,
           int flags,
           unsigned int offset) {
  return mmap(addr, len, prot, flags, fd, offset);
}

// This method blocks waiting for an event from either |device_fd| or
// |wake_event|; then if it's of the type POLLIN (meaning there's data) or
// POLLPRI (meaning a resolution change event) and from |device_fd|, this
// function calls |dequeue_callback| or |resolution_change_callback|,
// respectively. Since it blocks, it needs to work on its own
// SingleThreadTaskRunner, in this case |event_task_runner_|.
// TODO(mcasas): Add an error callback too.
void WaitOnceForEvents(int device_fd,
                       int wake_event,
                       base::OnceClosure dequeue_callback,
                       base::OnceClosure resolution_change_callback) {
  VLOGF(5) << "Going to poll()";

  // POLLERR, POLLHUP, or POLLNVAL are always return-able and anyway ignored
  // when set in pollfd.events.
  // https://www.kernel.org/doc/html/v5.15/userspace-api/media/v4l/func-poll.html
  struct pollfd pollfds[] = {{.fd = device_fd, .events = POLLIN | POLLPRI},
                             {.fd = wake_event, .events = POLLIN}};
  constexpr int kInfiniteTimeout = -1;
  if (HANDLE_EINTR(poll(pollfds, std::size(pollfds), kInfiniteTimeout)) <
      kIoctlOk) {
    PLOG(ERROR) << "Poll()ing for events failed";
    return;
  }

  const auto events_from_device = pollfds[0].revents;
  const auto other_events = pollfds[1].revents;
  // At least Qualcomm Venus likes to bundle events.
  const auto pollin_or_pollpri_event = events_from_device & (POLLIN | POLLPRI);
  if (pollin_or_pollpri_event) {
    // "POLLIN There is data to read."
    //  https://man7.org/linux/man-pages/man2/poll.2.html
    if (events_from_device & POLLIN) {
      std::move(dequeue_callback).Run();
    }
    // "If an event occurred (see ioctl VIDIOC_DQEVENT) then POLLPRI will be set
    //  in the revents field and poll() will return."
    // https://www.kernel.org/doc/html/v5.15/userspace-api/media/v4l/func-poll.html
    if (events_from_device & POLLPRI) {
      VLOGF(2) << "Resolution change event";

      // Dequeue the event otherwise it'll be stuck in the driver forever.
      struct v4l2_event event;
      memset(&event, 0, sizeof(event));  // Must do: v4l2_event has a union.
      if (HandledIoctl(device_fd, VIDIOC_DQEVENT, &event) != kIoctlOk) {
        PLOG(ERROR) << "Failed dequeing an event";
        return;
      }
      // If we get an event, it must be an V4L2_EVENT_SOURCE_CHANGE since it's
      // the only one we're subscribed to.
      DCHECK_EQ(event.type,
                static_cast<unsigned int>(V4L2_EVENT_SOURCE_CHANGE));
      DCHECK(event.u.src_change.changes & V4L2_EVENT_SRC_CH_RESOLUTION);

      std::move(resolution_change_callback).Run();
    }
    return;
  }
  if (other_events & POLLIN) {
    // Somebody woke us up because they didn't want us waiting on |device_fd|.
    // Do nothing.
    return;
  }

  // This could mean that |device_fd| has become invalid (closed, maybe);
  // there's little we can do here.
  // TODO(mcasas): Use the error callback to be added.
  CHECK((events_from_device & (POLLERR | POLLHUP | POLLNVAL)) ||
        (other_events & (POLLERR | POLLHUP | POLLNVAL)));
  VLOG(2) << "Unhandled |events_from_device|: 0x" << std::hex
          << events_from_device << ", or |other_events|: 0x" << other_events;
}

// Lifted from the similarly named method in platform/drm-tests [1].
// ITU-T H.264 7.4.1.2.4 implementation. Assumes non-interlaced.
// [1] https://source.chromium.org/chromiumos/chromiumos/codesearch/+/main:src/platform/drm-tests/bitstreams/bitstream_helper_h264.c;l=72-104;drc=a094a84679084106598763d0a551ef33a9ad422b
bool IsNewH264Frame(const media::H264SPS* sps,
                    const media::H264PPS* pps,
                    const media::H264SliceHeader* prev_slice_header,
                    const media::H264SliceHeader* curr_slice_header) {
  if (curr_slice_header->frame_num != prev_slice_header->frame_num ||
      curr_slice_header->pic_parameter_set_id != pps->pic_parameter_set_id ||
      curr_slice_header->nal_ref_idc != prev_slice_header->nal_ref_idc ||
      curr_slice_header->idr_pic_flag != prev_slice_header->idr_pic_flag ||
      (curr_slice_header->idr_pic_flag &&
       (curr_slice_header->idr_pic_id != prev_slice_header->idr_pic_id ||
        curr_slice_header->first_mb_in_slice == 0))) {
    return true;
  }

  if (sps->pic_order_cnt_type == 0) {
    if (curr_slice_header->pic_order_cnt_lsb !=
            prev_slice_header->pic_order_cnt_lsb ||
        curr_slice_header->delta_pic_order_cnt_bottom !=
            prev_slice_header->delta_pic_order_cnt_bottom) {
      return true;
    }
  } else if (sps->pic_order_cnt_type == 1) {
    if (curr_slice_header->delta_pic_order_cnt0 !=
            prev_slice_header->delta_pic_order_cnt0 ||
        curr_slice_header->delta_pic_order_cnt1 !=
            prev_slice_header->delta_pic_order_cnt1) {
      return true;
    }
  }

  return false;
}

// Concatenates |fragments| into a larger DecoderBuffer and empties |fragments|.
scoped_refptr<media::DecoderBuffer> ReassembleFragments(
    std::vector<scoped_refptr<media::DecoderBuffer>>& fragments) {
  size_t frame_size = 0;
  for (const auto& fragment : fragments) {
    frame_size += fragment->size();
  }
  auto temp_buffer = base::HeapArray<uint8_t>::Uninit(frame_size);
  uint8_t* dst = temp_buffer.data();
  for (const auto& fragment : fragments) {
    memcpy(dst, fragment->data(), fragment->size());
    dst += fragment->size();
  }

  auto reassembled_frame =
      media::DecoderBuffer::FromArray(std::move(temp_buffer));
  // Use the last fragment's timestamp as the |reassembled_frame|'s' timestamp.
  reassembled_frame->set_timestamp(fragments.back()->timestamp());

  fragments.clear();
  return reassembled_frame;
}

}  // namespace

namespace media {

// Stateful drivers need to be passed whole frames (see IsNewH264Frame() above).
// Some implementations (Hana MTK8173, but not Trogdor SC7180), don't support
// multiple whole frames enqueued in a single OUTPUT queue buffer. This class
// helps processing, slicing and gathering DecoderBuffers into full frames.
class H264FrameReassembler {
 public:
  H264FrameReassembler() = default;
  ~H264FrameReassembler() = default;
  // Not copyable, not movable (move ctors will be implicitly deleted).
  H264FrameReassembler(const H264FrameReassembler&) = delete;
  H264FrameReassembler& operator=(const H264FrameReassembler&) = delete;

  // This method parses |buffer| and decides whether it's part of a frame, it
  // marks the beginning of a new frame, it's a full frame itself, or if it
  // contains multiple frames. In any case, it might return a vector of
  // DecoderBuffer + DecodeCB; if so, the caller can treat those as ready to be
  // enqueued in the driver: this method will hold onto and reassemble
  // fragments as needed. |decode_cb| will be called internally to signal
  // errors or correctly received |buffer|s.
  std::vector<std::pair<scoped_refptr<DecoderBuffer>, VideoDecoder::DecodeCB>>
  Process(scoped_refptr<DecoderBuffer> buffer,
          VideoDecoder::DecodeCB decode_cb);

  // Used for End-of-Stream situations when a caller needs to reassemble
  // explicitly (an EOS marks a frame boundary, we can't parse it).
  scoped_refptr<DecoderBuffer> AssembleAndFlushFragments() {
    return ReassembleFragments(frame_fragments_);
  }
  bool HasFragments() const { return !frame_fragments_.empty(); }

 private:
  // Data structure returned by FindH264FrameBoundary().
  struct FrameBoundaryInfo {
    // True if the NALU immediately before the boundary is a whole frame, e.g.
    // an SPS, PPS, EOSeq or SEIMessage.
    bool is_whole_frame;
    // True if the NALU marks the beginning of a new frame (but itself isn't
    // necessarily a whole frame, for that see |is_whole_frame|). This implies
    // that any previously buffered fragments/slices can be reassembled into a
    // whole frame.
    bool is_start_of_new_frame;
    // Size in bytes of the NALU under analysis.
    off_t nalu_size;
  };
  // Parses |data| and returns either std::nullopt, if parsing |data| fails, or
  // a FrameBoundaryInfo describing the first |nalu_size| bytes of |data|.
  //
  // It is assumed that |data| contains an integer number of NALUs.
  std::optional<struct FrameBoundaryInfo> FindH264FrameBoundary(
      const uint8_t* const data,
      size_t size);

  H264Parser h264_parser_;
  static constexpr int kInvalidSPS = -1;
  int sps_id_ = kInvalidSPS;
  static constexpr int kInvalidPPS = -1;
  int pps_id_ = kInvalidPPS;
  std::unique_ptr<H264SliceHeader> previous_slice_header_;
  std::vector<scoped_refptr<DecoderBuffer>> frame_fragments_;
};

// static
base::AtomicRefCount V4L2StatefulVideoDecoder::num_decoder_instances_(0);

// static
std::unique_ptr<VideoDecoderMixin> V4L2StatefulVideoDecoder::Create(
    std::unique_ptr<MediaLog> media_log,
    scoped_refptr<base::SequencedTaskRunner> task_runner,
    base::WeakPtr<VideoDecoderMixin::Client> client) {
  DCHECK(task_runner->RunsTasksInCurrentSequence());
  DCHECK(client);

  return base::WrapUnique<VideoDecoderMixin>(new V4L2StatefulVideoDecoder(
      std::move(media_log), std::move(task_runner), std::move(client)));
}

void V4L2StatefulVideoDecoder::Initialize(const VideoDecoderConfig& config,
                                          bool /*low_delay*/,
                                          CdmContext* cdm_context,
                                          InitCB init_cb,
                                          const PipelineOutputCB& output_cb,
                                          const WaitingCB& /*waiting_cb*/) {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  DCHECK(config.IsValidConfig());
  DVLOGF(1) << config.AsHumanReadableString();

  if (config.is_encrypted() || !!cdm_context) {
    std::move(init_cb).Run(DecoderStatus::Codes::kUnsupportedEncryptionMode);
    return;
  }

  // Verify there's still room for more decoders before querying whether
  // |config| is supported because some drivers (e.g. Qualcomm Venus on SC7180)
  // would not allow for opening the device fd and we'd think it an error.
  static const auto decoder_instances_limit =
      V4L2StatefulVideoDecoder::GetMaxNumDecoderInstances();
  const bool can_create_decoder =
      num_decoder_instances_.Increment() < decoder_instances_limit;
  if (!can_create_decoder) {
    num_decoder_instances_.Decrement();
    LOG(ERROR) << "Too many decoder instances, max=" << decoder_instances_limit;
    std::move(init_cb).Run(DecoderStatus::Codes::kTooManyDecoders);
    return;
  }

  if (supported_configs_.empty()) {
    supported_configs_ = GetSupportedV4L2DecoderConfigs().value_or(
        SupportedVideoDecoderConfigs());
    DCHECK(!supported_configs_.empty());
  }
  // Make sure that the |config| requested is supported by the driver,
  // which must provide such information.
  if (!IsVideoDecoderConfigSupported(supported_configs_, config)) {
    VLOGF(1) << "Video configuration is not supported: "
             << config.AsHumanReadableString();
    MEDIA_LOG(INFO, media_log_) << "Video configuration is not supported: "
                                << config.AsHumanReadableString();
    std::move(init_cb).Run(DecoderStatus::Codes::kUnsupportedConfig);
    return;
  }

  if (!device_fd_.is_valid()) {
    constexpr char kVideoDeviceDriverPath[] = "/dev/video-dec0";
    device_fd_.reset(HANDLE_EINTR(
        open(kVideoDeviceDriverPath, O_RDWR | O_NONBLOCK | O_CLOEXEC)));
    if (!device_fd_.is_valid()) {
      std::move(init_cb).Run(DecoderStatus::Codes::kFailedToCreateDecoder);
      return;
    }
    wake_event_.reset(eventfd(/*initval=*/0, EFD_NONBLOCK | EFD_CLOEXEC));
    if (!wake_event_.is_valid()) {
      PLOG(ERROR) << "Failed to create an eventfd.";
      std::move(init_cb).Run(DecoderStatus::Codes::kFailedToCreateDecoder);
      return;
    }

    struct v4l2_capability caps = {};
    if (HandledIoctl(device_fd_.get(), VIDIOC_QUERYCAP, &caps) != kIoctlOk) {
      PLOG(ERROR) << "Failed querying caps";
      std::move(init_cb).Run(DecoderStatus::Codes::kFailedToCreateDecoder);
      return;
    }

    is_mtk8173_ = base::Contains(
        std::string(reinterpret_cast<const char*>(caps.card)), "8173");
    DVLOGF_IF(1, is_mtk8173_) << "This is an MTK8173 device (Hana, Oak)";
  }

  if (IsInitialized()) {
    // Almost always we'll be here when the MSE feeding the HTML <video> changes
    // tracks; this is implemented via a flush (a Decode() call with an
    // end_of_stream() DecoderBuffer) and then this very Initialize() call.
    // Technically, a V4L2 Memory-to-Memory stateful decoder can start decoding
    // after a flush ("Drain" in the V4L2 documentation) via either a START
    // command or sending a VIDIOC_STREAMOFF - VIDIOC_STREAMON to either queue
    // [1]. The START command is what we issue when seeing the LAST dequeued
    // CAPTURE buffer, but this is not enough for Hana MTK8173, so we issue a
    // full stream off here (see crbug.com/270039 for historical context).
    // [1] https://www.kernel.org/doc/html/v5.15/userspace-api/media/v4l/dev-decoder.html#drain

    // There should be no pending work.
    DCHECK(decoder_buffer_and_callbacks_.empty());

    // Invalidate pointers from and cancel all hypothetical in-flight requests
    // to the WaitOnceForEvents() routine.
    weak_ptr_factory_for_events_.InvalidateWeakPtrs();
    weak_ptr_factory_for_CAPTURE_availability_.InvalidateWeakPtrs();
    cancelable_task_tracker_.TryCancelAll();
    encoding_timestamps_.clear();

    if (OUTPUT_queue_ && !OUTPUT_queue_->Streamoff()) {
      LOG(ERROR) << "Failed to stop (VIDIOC_STREAMOFF) |OUTPUT_queue_|.";
    }
    if (CAPTURE_queue_ && !CAPTURE_queue_->Streamoff()) {
      LOG(ERROR) << "Failed to stop (VIDIOC_STREAMOFF) |CAPTURE_queue_|.";
    }
  }

  framerate_control_ = std::make_unique<V4L2FrameRateControl>(
      base::BindRepeating(&HandledIoctl, device_fd_.get()),
      base::SequencedTaskRunner::GetCurrentDefault());

  // At this point we initialize the |OUTPUT_queue_| only, following
  // instructions in e.g. [1]. The decoded video frames queue configuration
  // must wait until there are enough encoded chunks fed into said
  // |OUTPUT_queue_| for the driver to know the output details. The driver will
  // let us know that moment via a V4L2_EVENT_SOURCE_CHANGE.
  // [1] https://www.kernel.org/doc/html/v5.15/userspace-api/media/v4l/dev-decoder.html#initialization
  OUTPUT_queue_ = base::WrapRefCounted(new V4L2Queue(
      base::BindRepeating(&HandledIoctl, device_fd_.get()),
      /*schedule_poll_cb=*/base::DoNothing(),
      /*mmap_cb=*/base::BindRepeating(&Mmap, device_fd_.get()),
      AllocateSecureBufferAsCallback(), V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE,
      /*destroy_cb=*/base::DoNothing()));

  const auto profile_as_v4l2_fourcc =
      VideoCodecProfileToV4L2PixFmt(config.profile(), /*slice_based=*/false);

  // Allocate larger |OUTPUT_queue_| buffers for resolutions above 1080p.
  // TODO(hnt): Investigate ways to reduce this size.
  constexpr size_t kMiB = 1024 * 1024;
  constexpr int kFullHDNumPixels = 1920 * 1080;
  const size_t kInputBufferInMBs =
      (config.coded_size().GetArea() <= kFullHDNumPixels) ? 2 : 4;
  const auto v4l2_format = OUTPUT_queue_->SetFormat(
      profile_as_v4l2_fourcc, gfx::Size(), kInputBufferInMBs * kMiB);
  if (!v4l2_format) {
    std::move(init_cb).Run(DecoderStatus::Codes::kFailedToCreateDecoder);
    return;
  }
  DCHECK_EQ(v4l2_format->fmt.pix_mp.pixelformat, profile_as_v4l2_fourcc);

  const bool is_h264 =
      VideoCodecProfileToVideoCodec(config.profile()) == VideoCodec::kH264;
  constexpr size_t kNumInputBuffersH264 = 16;
  constexpr size_t kNumInputBuffersVPx = 2;
  const auto num_input_buffers =
      is_h264 ? kNumInputBuffersH264 : kNumInputBuffersVPx;
  if (OUTPUT_queue_->AllocateBuffers(num_input_buffers, V4L2_MEMORY_MMAP,
                                     /*incoherent=*/false) <
      num_input_buffers) {
    std::move(init_cb).Run(DecoderStatus::Codes::kFailedToCreateDecoder);
    return;
  }
  if (!OUTPUT_queue_->Streamon()) {
    std::move(init_cb).Run(DecoderStatus::Codes::kFailedToCreateDecoder);
    return;
  }
  client_->NotifyEstimatedMaxDecodeRequests(base::checked_cast<int>(
      std::min(static_cast<size_t>(4), num_input_buffers)));

  // Subscribe to the resolution change event. This is needed for resolution
  // changes mid stream but also to initialize the |CAPTURE_queue|.
  struct v4l2_event_subscription sub = {.type = V4L2_EVENT_SOURCE_CHANGE};
  if (HandledIoctl(device_fd_.get(), VIDIOC_SUBSCRIBE_EVENT, &sub) !=
      kIoctlOk) {
    PLOG(ERROR) << "Failed to subscribe to V4L2_EVENT_SOURCE_CHANGE";
    std::move(init_cb).Run(DecoderStatus::Codes::kFailedToCreateDecoder);
    return;
  }

  config_ = config;
  output_cb_ = std::move(output_cb);
  if (is_h264) {
    h264_frame_reassembler_ = std::make_unique<H264FrameReassembler>();
  }

  std::move(init_cb).Run(DecoderStatus::Codes::kOk);
}

void V4L2StatefulVideoDecoder::Decode(scoped_refptr<DecoderBuffer> buffer,
                                      DecodeCB decode_cb) {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  VLOGF(3) << buffer->AsHumanReadableString(/*verbose=*/false);
  if (!IsInitialized()) {
    Initialize(config_, /*low_delay=*/false, /*cdm_context=*/nullptr,
               /*init_cb=*/base::DoNothing(), output_cb_,
               /*waiting_cb=*/base::DoNothing());
  }

  if (buffer->end_of_stream()) {
    if (!event_task_runner_) {
      // Receiving Flush before any "normal" Decode() calls. This is a bit of a
      // contrived situation but possible, nonetheless ,and also a test case.
      std::move(decode_cb).Run(DecoderStatus::Codes::kOk);
      return;
    }

    if (h264_frame_reassembler_ && h264_frame_reassembler_->HasFragments()) {
      decoder_buffer_and_callbacks_.emplace(
          h264_frame_reassembler_->AssembleAndFlushFragments(),
          base::DoNothing());
      TryAndEnqueueOUTPUTQueueBuffers();
    }

    const bool is_pending_work = !decoder_buffer_and_callbacks_.empty();
    const bool decoding = !!CAPTURE_queue_;
    if (is_pending_work || !decoding) {
      // We still have |buffer|s that haven't been enqueued in |OUTPUT_queue_|,
      // or we're not decoding yet; if we were to SendStopCommand(), they would
      // not be processed. So let's store the end_of_stream() |buffer| for
      // later processing.
      decoder_buffer_and_callbacks_.emplace(std::move(buffer),
                                            std::move(decode_cb));
      return;
    }

    if (!OUTPUT_queue_->SendStopCommand()) {
      std::move(decode_cb).Run(DecoderStatus::Codes::kFailed);
      return;
    }

    RearmCAPTUREQueueMonitoring();
    flush_cb_ = std::move(decode_cb);
    return;
  }

  PrintAndTraceQueueStates(FROM_HERE);

  if (VideoCodecProfileToVideoCodec(config_.profile()) == VideoCodec::kH264) {
    auto processed_buffer_and_decode_cbs = h264_frame_reassembler_->Process(
        std::move(buffer), std::move(decode_cb));
    // If Process() returns nothing, then it swallowed its arguments and
    // there's nothing further to do. Otherwise, just treat whatever it
    // returned as a normal sequence of DecoderBuffer + DecodeCB.
    if (processed_buffer_and_decode_cbs.empty()) {
      return;
    }
    for (auto& a : processed_buffer_and_decode_cbs) {
      decoder_buffer_and_callbacks_.push(std::move(a));
    }

  } else if (VideoCodecProfileToVideoCodec(config_.profile()) ==
             VideoCodec::kHEVC) {
    NOTIMPLEMENTED();
    std::move(decode_cb).Run(DecoderStatus::Codes::kUnsupportedCodec);
    return;
  } else {
    decoder_buffer_and_callbacks_.emplace(std::move(buffer),
                                          std::move(decode_cb));
  }

  if (!TryAndEnqueueOUTPUTQueueBuffers()) {
    // All accepted entries in |decoder_buffer_and_callbacks_| must have had
    // their |decode_cb|s Run() from inside TryAndEnqueueOUTPUTQueueBuffers().
    return;
  }

  if (!event_task_runner_) {
    CHECK(!CAPTURE_queue_);  // It's the first configuration event.
    // |event_task_runner_| will block on OS resources, so it has to be a full
    // ThreadRunner ISO a SequencedTaskRunner, to avoid interfering with other
    // runners of the pool.
    event_task_runner_ = base::ThreadPool::CreateSingleThreadTaskRunner(
        {base::TaskShutdownBehavior::CONTINUE_ON_SHUTDOWN},
        base::SingleThreadTaskRunnerThreadMode::DEDICATED);
    CHECK(event_task_runner_);
  }
  RearmCAPTUREQueueMonitoring();
}

void V4L2StatefulVideoDecoder::Reset(base::OnceClosure closure) {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  DVLOGF(2);

  // In order to preserve the order of the callbacks between Decode() and
  // Reset(), we also trampoline |closure|.
  absl::Cleanup scoped_trampoline_reset = [closure =
                                               std::move(closure)]() mutable {
    base::SequencedTaskRunner::GetCurrentDefault()->PostTask(
        FROM_HERE, std::move(closure));
  };

  // Invalidate pointers from and cancel all hypothetical in-flight requests
  // to the WaitOnceForEvents() routine.
  weak_ptr_factory_for_events_.InvalidateWeakPtrs();
  weak_ptr_factory_for_CAPTURE_availability_.InvalidateWeakPtrs();
  cancelable_task_tracker_.TryCancelAll();

  if (h264_frame_reassembler_) {
    h264_frame_reassembler_ = std::make_unique<H264FrameReassembler>();
  }

  // Signal any pending work as kAborted.
  while (!decoder_buffer_and_callbacks_.empty()) {
    auto media_decode_cb =
        std::move(decoder_buffer_and_callbacks_.front().second);
    decoder_buffer_and_callbacks_.pop();
    std::move(media_decode_cb).Run(DecoderStatus::Codes::kAborted);
  }

  OUTPUT_queue_.reset();
  CAPTURE_queue_.reset();
  device_fd_.reset();

  event_task_runner_.reset();
  num_decoder_instances_.Decrement();
  encoding_timestamps_.clear();

  if (flush_cb_) {
    std::move(flush_cb_).Run(DecoderStatus::Codes::kAborted);
  }
}

bool V4L2StatefulVideoDecoder::NeedsBitstreamConversion() const {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  NOTREACHED_IN_MIGRATION()
      << "Our only owner VideoDecoderPipeline never calls here";
  return false;
}

bool V4L2StatefulVideoDecoder::CanReadWithoutStalling() const {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  NOTREACHED_IN_MIGRATION()
      << "Our only owner VideoDecoderPipeline never calls here";
  return true;
}

int V4L2StatefulVideoDecoder::GetMaxDecodeRequests() const {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  NOTREACHED_IN_MIGRATION()
      << "Our only owner VideoDecoderPipeline never calls here";
  return 4;
}

VideoDecoderType V4L2StatefulVideoDecoder::GetDecoderType() const {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  NOTREACHED_IN_MIGRATION()
      << "Our only owner VideoDecoderPipeline never calls here";
  return VideoDecoderType::kV4L2;
}

bool V4L2StatefulVideoDecoder::IsPlatformDecoder() const {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  NOTREACHED_IN_MIGRATION()
      << "Our only owner VideoDecoderPipeline never calls here";
  return true;
}

void V4L2StatefulVideoDecoder::ApplyResolutionChange() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  DVLOGF(2);
  // It's possible that we have been Reset()ed in the interval between receiving
  // the resolution change event in WaitOnceForEvents() (in a background thread)
  // and arriving here from our |client_|. Check if that's the case.
  if (IsInitialized())
    InitializeCAPTUREQueue();
}

size_t V4L2StatefulVideoDecoder::GetMaxOutputFramePoolSize() const {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  // VIDEO_MAX_FRAME is used as a size in V4L2 decoder drivers like Qualcomm
  // Venus. We should not exceed this limit for the frame pool that the decoder
  // writes into.
  return VIDEO_MAX_FRAME;
}

void V4L2StatefulVideoDecoder::SetDmaIncoherentV4L2(bool incoherent) {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  NOTIMPLEMENTED();
}

V4L2StatefulVideoDecoder::V4L2StatefulVideoDecoder(
    std::unique_ptr<MediaLog> media_log,
    scoped_refptr<base::SequencedTaskRunner> task_runner,
    base::WeakPtr<VideoDecoderMixin::Client> client)
    : VideoDecoderMixin(std::move(media_log),
                        std::move(task_runner),
                        std::move(client)),
      weak_ptr_factory_for_events_(this),
      weak_ptr_factory_for_CAPTURE_availability_(this) {
  DCHECK(decoder_task_runner_->RunsTasksInCurrentSequence());
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  DVLOGF(1);
}

V4L2StatefulVideoDecoder::~V4L2StatefulVideoDecoder() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  DVLOGF(1);

  weak_ptr_factory_for_events_.InvalidateWeakPtrs();
  weak_ptr_factory_for_CAPTURE_availability_.InvalidateWeakPtrs();
  cancelable_task_tracker_.TryCancelAll();  // Not needed, but good explicit.

  if (wake_event_.is_valid()) {
    const uint64_t buf = 1;
    const auto res = HANDLE_EINTR(write(wake_event_.get(), &buf, sizeof(buf)));
    PLOG_IF(ERROR, res < 0) << "Error writing to |wake_event_|";
  }

  CAPTURE_queue_.reset();
  OUTPUT_queue_.reset();
  num_decoder_instances_.Decrement();

  if (event_task_runner_) {
    // Destroy the two ScopedFDs (hence the PostTask business ISO DeleteSoon) on
    // |event_task_runner_| for proper teardown threading. This must be the last
    // operation in the destructor and after having explicitly destroyed other
    // objects that might use |device_fd|.
    event_task_runner_->PostTask(
        FROM_HERE,
        base::BindOnce([](base::ScopedFD fd) {}, std::move(device_fd_)));
    event_task_runner_->PostTask(
        FROM_HERE,
        base::BindOnce([](base::ScopedFD fd) {}, std::move(wake_event_)));
  }
}

bool V4L2StatefulVideoDecoder::InitializeCAPTUREQueue() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  DCHECK(IsInitialized()) << "V4L2StatefulVideoDecoder must be Initialize()d";

  CAPTURE_queue_ = base::WrapRefCounted(new V4L2Queue(
      base::BindRepeating(&HandledIoctl, device_fd_.get()),
      /*schedule_poll_cb=*/base::DoNothing(),
      /*mmap_cb=*/base::BindRepeating(&Mmap, device_fd_.get()),
      AllocateSecureBufferAsCallback(), V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE,
      /*destroy_cb=*/base::DoNothing()));

  const auto v4l2_format_or_error = CAPTURE_queue_->GetFormat();
  if (!v4l2_format_or_error.first || v4l2_format_or_error.second != kIoctlOk) {
    return false;
  }
  const struct v4l2_format v4l2_format = *(v4l2_format_or_error.first);
  VLOG(3) << "Out-of-the-box |CAPTURE_queue_| configuration: "
          << V4L2FormatToString(v4l2_format);

  const gfx::Size coded_size(v4l2_format.fmt.pix_mp.width,
                             v4l2_format.fmt.pix_mp.height);
  std::vector<ImageProcessor::PixelLayoutCandidate> candidates =
      EnumeratePixelLayoutCandidates(coded_size);

  // |visible_rect| is a subset of |coded_size| and represents the "natural"
  // size of the video, e.g. a 1080p sequence could have 1920x1080 "natural" or
  // |visible_rect|, but |coded_size| of 1920x1088 because of codec block
  // alignment of 16 samples.
  std::optional<gfx::Rect> visible_rect = CAPTURE_queue_->GetVisibleRect();
  if (!visible_rect) {
    return false;
  }
  CHECK(gfx::Rect(coded_size).Contains(*visible_rect));
  visible_rect_ = *visible_rect;

  const auto num_codec_reference_frames = GetNumberOfReferenceFrames();

  // Ask the pipeline to pick the output format from |CAPTURE_queue_|'s
  // |candidates|. If needed, it will try to instantiate an ImageProcessor.
  CroStatus::Or<ImageProcessor::PixelLayoutCandidate> status_or_output_format =
      client_->PickDecoderOutputFormat(
          candidates, *visible_rect,
          config_.aspect_ratio().GetNaturalSize(*visible_rect),
          /*output_size=*/std::nullopt, num_codec_reference_frames,
          /*use_protected=*/false, /*need_aux_frame_pool=*/false,
          /*allocator=*/std::nullopt);
  if (!status_or_output_format.has_value()) {
    return false;
  }

  const ImageProcessor::PixelLayoutCandidate output_format =
      std::move(status_or_output_format).value();
  auto chosen_fourcc = output_format.fourcc;
  const auto chosen_size = output_format.size;
  const auto chosen_modifier = output_format.modifier;

  // If our |client_| has a VideoFramePool to allocate buffers for us, we'll
  // use it, otherwise we have to ask the driver.
  const bool use_v4l2_allocated_buffers = !client_->GetVideoFramePool();

  const v4l2_memory buffer_type =
      use_v4l2_allocated_buffers ? V4L2_MEMORY_MMAP : V4L2_MEMORY_DMABUF;
  // If we don't |use_v4l2_allocated_buffers|, request as many as possible
  // (VIDEO_MAX_FRAME) since they are shallow allocations. Otherwise, allocate
  // |num_codec_reference_frames| plus one for the video frame being decoded,
  // and one for our client (presumably |client_|s ImageProcessor).
  const size_t v4l2_num_buffers = use_v4l2_allocated_buffers
                                      ? num_codec_reference_frames + 2
                                      : VIDEO_MAX_FRAME;

  if (!use_v4l2_allocated_buffers) {
    std::optional<GpuBufferLayout> layout =
        client_->GetVideoFramePool()->GetGpuBufferLayout();
    if (!layout.has_value()) {
      return false;
    }
    if (layout->modifier() == DRM_FORMAT_MOD_QCOM_COMPRESSED) {
      // V4L2 has no API to set DRM modifiers; instead we translate here to
      // the corresponding V4L2 pixel format.
      if (!CAPTURE_queue_
              ->SetFormat(V4L2_PIX_FMT_QC08C, chosen_size, /*buffer_size=*/0)
              .has_value()) {
        return false;
      }
      chosen_fourcc = Fourcc::FromV4L2PixFmt(V4L2_PIX_FMT_QC08C).value();
    }
  }
  VLOG(2) << "Chosen |CAPTURE_queue_| format: " << chosen_fourcc.ToString()
          << " " << chosen_size.ToString() << " (modifier: 0x" << std::hex
          << chosen_modifier << std::dec << "). Using " << v4l2_num_buffers
          << " |CAPTURE_queue_| slots.";

  const auto allocated_buffers = CAPTURE_queue_->AllocateBuffers(
      v4l2_num_buffers, buffer_type, /*incoherent=*/false);
  if (allocated_buffers < v4l2_num_buffers) {
    LOGF(ERROR) << "Failed to allocate enough CAPTURE buffers, requested= "
                << v4l2_num_buffers << " actual= " << allocated_buffers;
    return false;
  }
  if (!CAPTURE_queue_->Streamon()) {
    return false;
  }

  // We need to "enqueue" allocated buffers in the driver in order to use them.
  TryAndEnqueueCAPTUREQueueBuffers();

  TryAndEnqueueOUTPUTQueueBuffers();

  RearmCAPTUREQueueMonitoring();

  return true;
}

std::vector<ImageProcessor::PixelLayoutCandidate>
V4L2StatefulVideoDecoder::EnumeratePixelLayoutCandidates(
    const gfx::Size& coded_size) {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  DCHECK(CAPTURE_queue_) << "|CAPTURE_queue_| must be created at this point";

  const auto v4l2_pix_fmts = EnumerateSupportedPixFmts(
      base::BindRepeating(&HandledIoctl, device_fd_.get()),
      V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);

  std::vector<ImageProcessor::PixelLayoutCandidate> candidates;
  for (const uint32_t& pixfmt : v4l2_pix_fmts) {
    const auto candidate_fourcc = Fourcc::FromV4L2PixFmt(pixfmt);
    if (!candidate_fourcc) {
      continue;  // This is fine: means we don't recognize |candidate_fourcc|.
    }

    // TODO(mcasas): Consider what to do when the input bitstream is of higher
    // bit depth: Some drivers (QC?) will support and enumerate both a high bit
    // depth and a low bit depth pixel formats. We'd like to choose the higher
    // bit depth and let Chrome's display pipeline decide what to do.

    candidates.emplace_back(ImageProcessor::PixelLayoutCandidate{
        .fourcc = *candidate_fourcc, .size = coded_size});
    VLOG(2) << "CAPTURE queue candidate format: "
            << candidate_fourcc->ToString() << ", " << coded_size.ToString();
  }
  return candidates;
}

size_t V4L2StatefulVideoDecoder::GetNumberOfReferenceFrames() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  DCHECK(CAPTURE_queue_) << "|CAPTURE_queue_| must be created at this point";

  // Estimate the number of buffers needed for the |CAPTURE_queue_| and for
  // codec reference requirements. For VP9 and AV1, the maximum number of
  // reference frames is constant and 8 (for VP8 is 4); for H.264 and other
  // ITU-T codecs, it depends on the bitstream. Here we query it from the
  // driver anyway.
  constexpr size_t kDefaultNumReferenceFrames = 8;
  constexpr size_t kDefaultNumReferenceFramesMTK8173 = 16;
  size_t num_codec_reference_frames = is_mtk8173_
                                          ? kDefaultNumReferenceFramesMTK8173
                                          : kDefaultNumReferenceFrames;

  struct v4l2_ext_control ctrl = {.id = V4L2_CID_MIN_BUFFERS_FOR_CAPTURE};
  struct v4l2_ext_controls ext_ctrls = {.count = 1, .controls = &ctrl};
  if (HandledIoctl(device_fd_.get(), VIDIOC_G_EXT_CTRLS, &ext_ctrls) ==
      kIoctlOk) {
    num_codec_reference_frames = std::max(
        base::checked_cast<size_t>(ctrl.value), num_codec_reference_frames);
  }
  VLOG(2) << "Driver wants: " << ctrl.value
          << " CAPTURE buffers. We'll use: " << num_codec_reference_frames;

  // Verify |num_codec_reference_frames| has a reasonable value. Anecdotally 18
  // is the largest amount of reference frames seen, on some ITU-T H.264 test
  // vectors (e.g. CABA1_SVA_B.h264).
  CHECK_LE(num_codec_reference_frames, 18u);

  return num_codec_reference_frames;
}

void V4L2StatefulVideoDecoder::RearmCAPTUREQueueMonitoring() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);

  auto dequeue_callback = base::BindPostTaskToCurrentDefault(base::BindOnce(
      &V4L2StatefulVideoDecoder::TryAndDequeueCAPTUREQueueBuffers,
      weak_ptr_factory_for_events_.GetWeakPtr()));
  // |client_| needs to be told of a hypothetical resolution change (to wait for
  // frames in flight etc). Once that's done they will ping us via
  // ApplyResolutionChange(). We use a trampoline lambda to make sure
  // |weak_ptr_factory_for_events_|'s pointers have not been invalidated (e.g.
  // by a Reset()).
  auto resolution_change_callback =
      base::BindPostTaskToCurrentDefault(base::BindOnce(
          [](base::WeakPtr<VideoDecoderMixin::Client> client,
             base::WeakPtr<V4L2StatefulVideoDecoder> weak_this) {
            if (weak_this && client) {
              client->PrepareChangeResolution();
            }
          },
          client_, weak_ptr_factory_for_events_.GetWeakPtr()));

  // Here we launch a single "wait for a |CAPTURE_queue_| event" monitoring
  // Task (via an infinite-wait POSIX poll()). It lives on a background
  // SequencedTaskRunner whose lifetime we don't control (comes from a pool), so
  // it can outlive this class -- this is fine, however, because upon
  // V4L2StatefulVideoDecoder destruction:
  // - |cancelable_task_tracker_| is used to try to drop all such Tasks that
  //   have not been serviced.
  // - Any WeakPtr used for WaitOnceForEvents() callbacks will be invalidated
  //   (in particular, |client_| is a WeakPtr).
  // - A |wake_event_| is sent to break a hypothetical poll() wait;
  //   WaitOnceForEvents() should return immediately upon this happening.
  //   (|wake_event_| is needed because we cannot rely on POSIX to wake a
  //   thread that is blocked on a poll() upon the closing of an FD from a
  //   different thread, concretely the "result is unspecified").
  // - Both |device_fd_| and |wake_event_| are posted for destruction on said
  //   background SingleThreadTaskRunner so that the FDs monitored by poll() are
  //   guaranteed to stay alive until poll() returns, thus avoiding unspecified
  //   behavior.
  cancelable_task_tracker_.PostTask(
      event_task_runner_.get(), FROM_HERE,
      base::BindOnce(&WaitOnceForEvents, device_fd_.get(), wake_event_.get(),
                     std::move(dequeue_callback),
                     std::move(resolution_change_callback)));
}

void V4L2StatefulVideoDecoder::TryAndDequeueCAPTUREQueueBuffers() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  DCHECK(CAPTURE_queue_) << "|CAPTURE_queue_| must be created at this point";

  const v4l2_memory queue_type = CAPTURE_queue_->GetMemoryType();
  DCHECK(queue_type == V4L2_MEMORY_MMAP || queue_type == V4L2_MEMORY_DMABUF);
  const bool use_v4l2_allocated_buffers = !client_->GetVideoFramePool();
  DCHECK((queue_type == V4L2_MEMORY_MMAP && use_v4l2_allocated_buffers) ||
         (queue_type == V4L2_MEMORY_DMABUF && !use_v4l2_allocated_buffers));

  bool success;
  scoped_refptr<V4L2ReadableBuffer> dequeued_buffer;
  for (std::tie(success, dequeued_buffer) = CAPTURE_queue_->DequeueBuffer();
       success && dequeued_buffer;
       std::tie(success, dequeued_buffer) = CAPTURE_queue_->DequeueBuffer()) {
    PrintAndTraceQueueStates(FROM_HERE);

    const int64_t flat_timespec =
        TimeValToTimeDelta(dequeued_buffer->GetTimeStamp()).InMilliseconds();
    if (base::Contains(encoding_timestamps_, flat_timespec)) {
      UMA_HISTOGRAM_TIMES(
          "Media.PlatformVideoDecoding.Decode",
          base::TimeTicks::Now() - encoding_timestamps_[flat_timespec]);
      encoding_timestamps_.erase(flat_timespec);
    }

    // A buffer marked "last" indicates the end of a flush. Note that, according
    // to spec, this buffer may or may not have zero |bytesused|.
    // https://www.kernel.org/doc/html/v5.15/userspace-api/media/v4l/dev-decoder.html#drain
    if (dequeued_buffer->IsLast()) {
      VLOGF(3) << "Buffer marked LAST in |CAPTURE_queue_|";

      // Make sure the |OUTPUT_queue_| is really empty before restarting.
      if (!DrainOUTPUTQueue()) {
        LOG(ERROR) << "Failed to drain resources from |OUTPUT_queue_|.";
      }

      // According to the spec, decoding can be restarted either sending a
      // "V4L2_DEC_CMD_START - the decoder will not be reset and will resume
      //  operation normally, with all the state from before the drain," or
      // sending a VIDIOC_STREAMOFF - VIDIOC_STREAMON to either queue. Since we
      // want to keep the state (e.g. resolution, |client_| buffers), we try
      // the first option.
      if (!CAPTURE_queue_->SendStartCommand()) {
        VLOGF(3) << "Failed to resume decoding after flush";
        // TODO(mcasas): Handle this error.
      }
      // In some cases we still have enqueued work in |OUTPUT_queue_| after
      // seeing the LAST buffer. This happens at least when there's a pending
      // resolution change (see vp80-03-segmentation-1436.ivf), that according
      // to [1] must be processed first.
      // [1] https://www.kernel.org/doc/html/v5.15/userspace-api/media/v4l/dev-decoder.html#drain
      const bool has_pending_OUTPUT_queue_work =
          OUTPUT_queue_->QueuedBuffersCount();
      if (flush_cb_ && !has_pending_OUTPUT_queue_work) {
        std::move(flush_cb_).Run(DecoderStatus::Codes::kOk);
      }
      return;
    } else if (!dequeued_buffer->IsError()) {
      // IsError() doesn't flag a fatal error, but more a discard-this-buffer
      // marker. This is seen -seldom- from venus driver (QC) when entering a
      // dynamic resolution mode: the driver flushes the queue with errored
      // buffers before sending the IsLast() buffer.
      scoped_refptr<FrameResource> frame = dequeued_buffer->GetFrameResource();
      CHECK(frame);

      frame->set_timestamp(TimeValToTimeDelta(dequeued_buffer->GetTimeStamp()));

      //  For a V4L2_MEMORY_MMAP |CAPTURE_queue_| we wrap |frame| to return
      //  |dequeued_buffer| to |CAPTURE_queue_|, where they are "pooled". For a
      //  V4L2_MEMORY_DMABUF |CAPTURE_queue_|, we don't do that because the
      //  VideoFrames are pooled in |client_|s;
      //  TryAndEnqueueCAPTUREQueueBuffers() will find them there.
      if (queue_type == V4L2_MEMORY_MMAP) {
        // Don't query |CAPTURE_queue_|'s GetVisibleRect() here because it races
        // with hypothetical resolution changes.
        CHECK(gfx::Rect(frame->coded_size()).Contains(visible_rect_));
        CHECK(frame->visible_rect().Contains(visible_rect_));
        auto wrapped_frame =
            frame->CreateWrappingFrame(visible_rect_,
                                       /*natural_size=*/visible_rect_.size());

        // Make sure |dequeued_buffer| stays alive and its reference released as
        // |wrapped_frame| is destroyed, allowing -maybe- for it to get back to
        // |CAPTURE_queue_|s free buffers.
        wrapped_frame->AddDestructionObserver(
            base::BindPostTaskToCurrentDefault(base::BindOnce(
                [](scoped_refptr<V4L2ReadableBuffer> buffer,
                   base::WeakPtr<V4L2StatefulVideoDecoder> weak_this) {
                  // See also TryAndEnqueueCAPTUREQueueBuffers(), V4L2Queue is
                  // funny: We need to "enqueue" released buffers in the driver
                  // in order to use them (otherwise they would stay as "free").
                  if (weak_this) {
                    weak_this->TryAndEnqueueCAPTUREQueueBuffers();
                    weak_this->PrintAndTraceQueueStates(FROM_HERE);
                  }
                },
                std::move(dequeued_buffer),
                weak_ptr_factory_for_CAPTURE_availability_.GetWeakPtr())));
        CHECK(wrapped_frame);
        VLOGF(3) << wrapped_frame->AsHumanReadableString();
        output_cb_.Run(std::move(wrapped_frame));
      } else {
        DCHECK_EQ(queue_type, V4L2_MEMORY_DMABUF);
        VLOGF(3) << frame->AsHumanReadableString();
        framerate_control_->AttachToFrameResource(frame);
        output_cb_.Run(std::move(frame));
      }

      // We just dequeued one decoded |frame|; try to reclaim |OUTPUT_queue|
      // resources that might just have been released.
      if (!DrainOUTPUTQueue()) {
        LOG(ERROR) << "Failed to drain resources from |OUTPUT_queue_|.";
      }
    }
  }
  LOG_IF(ERROR, !success) << "Failed dequeueing from |CAPTURE_queue_|";
  // Not an error if |dequeued_buffer| is empty, it's just an empty queue.

  // There might be available resources for |CAPTURE_queue_| from previous
  // cycles; try and make them available for the driver.
  TryAndEnqueueCAPTUREQueueBuffers();

  TryAndEnqueueOUTPUTQueueBuffers();

  RearmCAPTUREQueueMonitoring();
}

void V4L2StatefulVideoDecoder::TryAndEnqueueCAPTUREQueueBuffers() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  DCHECK(CAPTURE_queue_) << "|CAPTURE_queue_| must be created at this point";
  const v4l2_memory queue_type = CAPTURE_queue_->GetMemoryType();

  DCHECK(queue_type == V4L2_MEMORY_MMAP || queue_type == V4L2_MEMORY_DMABUF);
  // V4L2Queue is funny because even though it might have "free" buffers, the
  // user (i.e. this code) needs to "enqueue" then for the actual v4l2 queue
  // to use them.
  if (queue_type == V4L2_MEMORY_MMAP) {
    while (auto v4l2_buffer = CAPTURE_queue_->GetFreeBuffer()) {
      if (!std::move(*v4l2_buffer).QueueMMap()) {
        LOG(ERROR) << "CAPTURE queue failed to enqueue an MMAP buffer.";
        return;
      }
    }
  } else {
    while (true) {
      // When using a V4L2_MEMORY_DMABUF queue, resource ownership is in our
      // |client_|s frame pool, and usually has less resources than what we
      // have allocated here (because ours are just empty queue slots and we
      // allocate conservatively). So, it's common that said frame pool gets
      // exhausted before we run out of |CAPTURE_queue_|s free "buffers" here.
      if (client_->GetVideoFramePool()->IsExhausted()) {
        // All VideoFrames are elsewhere (maybe in flight). Request a callback
        // when some of them are back.
        // This weird jump is because the video frame pool cannot be called
        // back (e.g. to query whether IsExhausted()) from the
        // NotifyWhenFrameAvailable() callback because it would deadlock.
        client_->GetVideoFramePool()->NotifyWhenFrameAvailable(base::BindOnce(
            base::IgnoreResult(&base::SequencedTaskRunner::PostTask),
            base::SequencedTaskRunner::GetCurrentDefault(), FROM_HERE,
            base::BindOnce(
                &V4L2StatefulVideoDecoder::TryAndEnqueueCAPTUREQueueBuffers,
                weak_ptr_factory_for_CAPTURE_availability_.GetWeakPtr())));
        return;
      }
      auto frame = client_->GetVideoFramePool()->GetFrame();
      CHECK(frame);

      // TODO(mcasas): Consider using GetFreeBufferForFrame().
      auto v4l2_buffer = CAPTURE_queue_->GetFreeBuffer();
      if (!v4l2_buffer) {
        VLOGF(1) << "|CAPTURE_queue_| has no buffers";
        return;
      }

      if (!std::move(*v4l2_buffer).QueueDMABuf(std::move(frame))) {
        LOG(ERROR) << "CAPTURE queue failed to enqueue a DmaBuf buffer.";
        return;
      }
    }
  }
}

bool V4L2StatefulVideoDecoder::DrainOUTPUTQueue() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  DCHECK(IsInitialized()) << "V4L2StatefulVideoDecoder must be Initialize()d";

  bool success;
  scoped_refptr<V4L2ReadableBuffer> dequeued_buffer;
  for (std::tie(success, dequeued_buffer) = OUTPUT_queue_->DequeueBuffer();
       success && dequeued_buffer;
       std::tie(success, dequeued_buffer) = OUTPUT_queue_->DequeueBuffer()) {
    PrintAndTraceQueueStates(FROM_HERE);
  }
  return success;
}

bool V4L2StatefulVideoDecoder::TryAndEnqueueOUTPUTQueueBuffers() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  DCHECK(IsInitialized()) << "V4L2StatefulVideoDecoder must be Initialize()d";

  // First try to recover some free slots in |OUTPUT_queue_|.
  if (!DrainOUTPUTQueue()) {
    PLOG(ERROR) << "Failed to drain resources from |OUTPUT_queue_|.";
    return false;
  }

  for (std::optional<V4L2WritableBufferRef> v4l2_buffer =
           OUTPUT_queue_->GetFreeBuffer();
       v4l2_buffer && !decoder_buffer_and_callbacks_.empty();
       v4l2_buffer = OUTPUT_queue_->GetFreeBuffer()) {
    PrintAndTraceQueueStates(FROM_HERE);

    auto media_buffer = std::move(decoder_buffer_and_callbacks_.front().first);
    auto media_decode_cb =
        std::move(decoder_buffer_and_callbacks_.front().second);
    decoder_buffer_and_callbacks_.pop();

    if (media_buffer->end_of_stream()) {
      // We had received an end_of_stream() buffer but there were still pending
      // |decoder_buffer_and_callbacks_|, so we stored it; we can now process it
      // and start the Flush.
      if (!OUTPUT_queue_->SendStopCommand()) {
        std::move(media_decode_cb).Run(DecoderStatus::Codes::kFailed);
        return false;
      }
      flush_cb_ = std::move(media_decode_cb);
      return true;
    }

    CHECK_EQ(v4l2_buffer->PlanesCount(), 1u);
    uint8_t* dst = static_cast<uint8_t*>(v4l2_buffer->GetPlaneMapping(0));
    CHECK_GE(v4l2_buffer->GetPlaneSize(/*plane=*/0), media_buffer->size());
    memcpy(dst, media_buffer->data(), media_buffer->size());
    v4l2_buffer->SetPlaneBytesUsed(0, media_buffer->size());
    VLOGF(4) << "Enqueuing " << media_buffer->size() << " bytes.";
    v4l2_buffer->SetTimeStamp(TimeDeltaToTimeVal(media_buffer->timestamp()));

    const int64_t flat_timespec = media_buffer->timestamp().InMilliseconds();
    encoding_timestamps_[flat_timespec] = base::TimeTicks::Now();

    if (!std::move(*v4l2_buffer).QueueMMap()) {
      LOG(ERROR) << "Error while queuing input |media_buffer|!";
      std::move(media_decode_cb)
          .Run(DecoderStatus::Codes::kPlatformDecodeFailure);
      return false;
    }
    std::move(media_decode_cb).Run(DecoderStatus::Codes::kOk);
  }
  return true;
}

void V4L2StatefulVideoDecoder::PrintAndTraceQueueStates(
    const base::Location& from_here) {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  DCHECK(IsInitialized()) << "V4L2StatefulVideoDecoder must be Initialize()d";
  VLOG(4) << from_here.function_name() << "(): |OUTPUT_queue_| "
          << OUTPUT_queue_->QueuedBuffersCount() << "/"
          << OUTPUT_queue_->AllocatedBuffersCount() << ", |CAPTURE_queue_| "
          << (CAPTURE_queue_ ? CAPTURE_queue_->QueuedBuffersCount() : 0) << "/"
          << (CAPTURE_queue_ ? CAPTURE_queue_->AllocatedBuffersCount() : 0);

  TRACE_COUNTER_ID1(
      "media,gpu", "V4L2 OUTPUT Q used buffers", this,
      base::checked_cast<int32_t>(OUTPUT_queue_->QueuedBuffersCount()));
  TRACE_COUNTER_ID1("media,gpu", "V4L2 CAPTURE Q free buffers", this,
                    (CAPTURE_queue_ ? base::checked_cast<int32_t>(
                                          CAPTURE_queue_->QueuedBuffersCount())
                                    : 0));
}

bool V4L2StatefulVideoDecoder::IsInitialized() const {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  return !!OUTPUT_queue_;
}

// static
int V4L2StatefulVideoDecoder::GetMaxNumDecoderInstances() {
  if (!base::FeatureList::IsEnabled(media::kLimitConcurrentDecoderInstances)) {
    return std::numeric_limits<int>::max();
  }
  constexpr char kVideoDeviceDriverPath[] = "/dev/video-dec0";
  base::ScopedFD device_fd(HANDLE_EINTR(
      open(kVideoDeviceDriverPath, O_RDWR | O_NONBLOCK | O_CLOEXEC)));
  if (!device_fd.is_valid()) {
    return std::numeric_limits<int>::max();
  }
  struct v4l2_capability caps = {};
  if (HandledIoctl(device_fd.get(), VIDIOC_QUERYCAP, &caps) != kIoctlOk) {
    PLOG(ERROR) << "Failed querying caps";
    return std::numeric_limits<int>::max();
  }
  const bool is_mtk8173 = base::Contains(
      std::string(reinterpret_cast<const char*>(caps.card)), "8173");
  // Experimentally MTK8173 (e.g. Hana) can initialize the driver  up to 30
  // times simultaneously, however legacy code limits this to 10 [1] . All other
  // drivers used to limit this to 32 [2] but in practice I could only open up
  // to 15 with e.g. Qualcomm SC7180.
  // [1] https://source.chromium.org/chromium/chromium/src/+/main:media/gpu/v4l2/legacy/v4l2_video_decode_accelerator.h;l=449-454;drc=83195d4d1e1a4e54f148ddc80d0edcf5daa755ff
  // [2] https://source.chromium.org/chromium/chromium/src/+/main:media/gpu/v4l2/v4l2_video_decoder.h;l=183-189;drc=90fa47c897b589bc4857fb7ccafab46a4be2e2ae
  return is_mtk8173 ? 10 : 15;
}

std::vector<std::pair<scoped_refptr<DecoderBuffer>, VideoDecoder::DecodeCB>>
H264FrameReassembler::Process(scoped_refptr<DecoderBuffer> buffer,
                              VideoDecoder::DecodeCB decode_cb) {
  std::vector<std::pair<scoped_refptr<DecoderBuffer>, VideoDecoder::DecodeCB>>
      whole_frames;

  auto remaining = base::span(*buffer);

  do {
    const auto nalu_info =
        FindH264FrameBoundary(remaining.data(), remaining.size());
    if (!nalu_info.has_value()) {
      LOG(ERROR) << "Failed parsing H.264 DecoderBuffer";
      std::move(decode_cb).Run(DecoderStatus::Codes::kFailed);
      return {};
    }
    const size_t found_nalu_size =
        base::checked_cast<size_t>(nalu_info->nalu_size);

    if (nalu_info->is_start_of_new_frame && HasFragments()) {
      VLOGF(4) << frame_fragments_.size()
               << " currently stored frame fragment(s) can be reassembled.";
      whole_frames.emplace_back(ReassembleFragments(frame_fragments_),
                                base::DoNothing());
    }

    if (nalu_info->is_whole_frame) {
      VLOGF(3) << "Found a whole frame, size=" << found_nalu_size << " bytes";
      whole_frames.emplace_back(
          DecoderBuffer::CopyFrom(remaining.first(found_nalu_size)),
          base::DoNothing());
      whole_frames.back().first->set_timestamp(buffer->timestamp());

      remaining = remaining.subspan(found_nalu_size);
      continue;
    }

    VLOGF(4) << "This was a frame fragment; storing it for later reassembly.";
    frame_fragments_.emplace_back(
        DecoderBuffer::CopyFrom(remaining.first(found_nalu_size)));
    frame_fragments_.back()->set_timestamp(buffer->timestamp());

    remaining = remaining.subspan(found_nalu_size);
  } while (!remaining.empty());

  // |decode_cb| is used to signal to our client that encoded chunks have been
  // "accepted", and that we are ready to receive more. If we have found (some)
  // whole frame(s), then we can just return |decode_cb| so that it can be Run()
  // at the actual enqueueing in driver moment; but if there were no frames
  // found, we need to signal the callback now otherwise the client might stop
  // sending fragments altogether and we'll wait forever.
  if (whole_frames.empty()) {
    std::move(decode_cb).Run(DecoderStatus::Codes::kOk);
  } else {
    whole_frames.back().second = std::move(decode_cb);
  }

  return whole_frames;
}

std::optional<struct H264FrameReassembler::FrameBoundaryInfo>
H264FrameReassembler::FindH264FrameBoundary(const uint8_t* const data,
                                            size_t data_size) {
  h264_parser_.SetStream(data, data_size);
  while (true) {
    H264NALU nalu = {};
    H264Parser::Result result = h264_parser_.AdvanceToNextNALU(&nalu);
    if (result == H264Parser::kInvalidStream ||
        result == H264Parser::kUnsupportedStream) {
      LOG(ERROR) << "Could not parse bitstream.";
      return std::nullopt;
    }
    if (result == H264Parser::kEOStream) {
      // Not an error per se, but strange to run out of data without having
      // found a new NALU boundary. Pretend it's a frame boundary and move on.
      return FrameBoundaryInfo{.is_whole_frame = true,
                               .is_start_of_new_frame = true,
                               .nalu_size = nalu.size};
    }
    DCHECK_EQ(result, H264Parser::kOk);

    static const char* kKnownNALUNames[] = {
        "Unspecified", "NonIDRSlice",   "SliceDataA",
        "SliceDataB",  "SliceDataC",    "IDRSlice",
        "SEIMessage",  "SPS",           "PPS",
        "AUD",         "EOSeq",         "EOStream",
        "Filler",      "SPSExt",        "Prefix",
        "SubsetSPS",   "DPS",           "Reserved17",
        "Reserved18",  "CodedSliceAux", "CodedSliceExtension",
    };
    constexpr auto kMaxNALUTypeValue = std::size(kKnownNALUNames);
    if (base::checked_cast<size_t>(nalu.nal_unit_type) >= kMaxNALUTypeValue) {
      LOG(ERROR) << "NALU type unknown.";
      return std::nullopt;
    }

    CHECK_GE(nalu.data, data);
    CHECK_LE(nalu.data, data + data_size);
    const auto nalu_size = nalu.data - data + nalu.size;
    VLOGF(4) << "H264NALU type " << kKnownNALUNames[nalu.nal_unit_type]
             << ", NALU size=" << nalu_size
             << " bytes, payload size=" << nalu.size << " bytes";

    switch (nalu.nal_unit_type) {
      case H264NALU::kSPS:
        result = h264_parser_.ParseSPS(&sps_id_);
        if (result != H264Parser::kOk) {
          LOG(ERROR) << "Could not parse SPS header.";
          return std::nullopt;
        }
        previous_slice_header_.reset();
        return FrameBoundaryInfo{.is_whole_frame = true,
                                 .is_start_of_new_frame = true,
                                 .nalu_size = nalu_size};
      case H264NALU::kPPS:
        result = h264_parser_.ParsePPS(&pps_id_);
        if (result != H264Parser::kOk) {
          LOG(ERROR) << "Could not parse PPS header.";
          return std::nullopt;
        }
        previous_slice_header_.reset();
        return FrameBoundaryInfo{.is_whole_frame = true,
                                 .is_start_of_new_frame = true,
                                 .nalu_size = nalu_size};
      case H264NALU::kNonIDRSlice:
      case H264NALU::kIDRSlice: {
        H264SliceHeader curr_slice_header;
        result = h264_parser_.ParseSliceHeader(nalu, &curr_slice_header);
        if (result != H264Parser::kOk) {
          // In this function we just want to find frame boundaries, so return
          // but don't mark an error.
          LOG(WARNING) << "Could not parse NALU header.";
          return FrameBoundaryInfo{.is_whole_frame = true,
                                   .is_start_of_new_frame = false,
                                   .nalu_size = nalu_size};
        }
        const bool is_new_frame =
            previous_slice_header_ &&
            IsNewH264Frame(h264_parser_.GetSPS(sps_id_),
                           h264_parser_.GetPPS(pps_id_),
                           previous_slice_header_.get(), &curr_slice_header);
        previous_slice_header_ =
            std::make_unique<H264SliceHeader>(curr_slice_header);
        return FrameBoundaryInfo{.is_whole_frame = false,
                                 .is_start_of_new_frame = is_new_frame,
                                 .nalu_size = nalu_size};
      }
      case H264NALU::kSEIMessage:
      case H264NALU::kAUD:
      case H264NALU::kEOSeq:
      case H264NALU::kEOStream:
      case H264NALU::kFiller:
      case H264NALU::kSPSExt:
      case H264NALU::kPrefix:
      case H264NALU::kSubsetSPS:
      case H264NALU::kDPS:
      case H264NALU::kReserved17:
      case H264NALU::kReserved18:
        // Anything else than SPS, PPS and Non/IDRs marks a new frame boundary.
        previous_slice_header_.reset();
        return FrameBoundaryInfo{.is_whole_frame = true,
                                 .is_start_of_new_frame = true,
                                 .nalu_size = nalu_size};
      default:
        VLOGF(4) << "Unsupported NALU " << kKnownNALUNames[nalu.nal_unit_type];
    }
  }
}

}  // namespace media
chromium/media/gpu/v4l2/v4l2_stateful_video_decoder.cc