// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Decompression utility for the MT21C pixel format.
//
// Note that this file and its corresponding .cc file have some very SoC
// specific code. While we would ideally like to avoid tying code so closely
// with a specific chip, this code is in the critical path for video decoding,
// and we know that we will only ever need to run this code on the MT8173. Every
// other SoC in the MT81XX line support a pixel format called MM21, which we
// have generic support for in libyuv.
//
// We may some day decide to try using MT21C on other chips in the MT81XX line,
// but we will need to change significant sections of this code to make that
// viable. Our assumptions about the relative speed the big and little cores,
// the number of cores, the CPU IDs of the cores, the timings of the SIMD
// instructions, the availability of ARM64, etc, will all be incorrect.
#ifndef MEDIA_GPU_V4L2_MT21_MT21_DECOMPRESSOR_H_
#define MEDIA_GPU_V4L2_MT21_MT21_DECOMPRESSOR_H_
#include "build/build_config.h"
#if !defined(ARCH_CPU_ARM_FAMILY)
#error "MT21Decompressor is only intended to run on MT8173 (ARM)"
#endif
#if !(defined(COMPILER_GCC) || defined(__clang__))
#error "MT21Decompressor is only intended to be built with GCC or Clang"
#endif
#include <stdint.h>
#include <atomic>
#include <memory>
#include <thread>
#include <vector>
#include "base/memory/raw_ptr.h"
#include "base/memory/ref_counted.h"
#include "base/memory/scoped_refptr.h"
#include "base/synchronization/lock.h"
#include "base/synchronization/waitable_event.h"
#include "ui/gfx/geometry/size.h"
namespace media {
struct GolombRiceTableEntry;
struct MT21DecompressionJob : public base::RefCounted<MT21DecompressionJob> {
MT21DecompressionJob(const uint8_t* src,
const uint8_t* footer,
size_t offset,
uint8_t* dest,
size_t width,
size_t height,
bool is_chroma);
const uint8_t* src;
const uint8_t* footer;
size_t offset;
RAW_PTR_EXCLUSION uint8_t* dest;
size_t width;
size_t height;
bool is_chroma;
base::WaitableEvent wakeup_event;
base::WaitableEvent done_event;
private:
friend class base::RefCounted<MT21DecompressionJob>;
~MT21DecompressionJob() = default;
};
// We considered making this an ImageProcessorBackend, but it turns out we need
// access to the raw V4L2 buffer. MT21C planes have a "secret" footer containing
// metadata necessary for decompression appended to the beginning of the last
// page in the buffer. This extra data is totally unknown to Chrome abstractions
// like VideoFrame, which just assume a plane's size is determined by stride and
// height.
class MT21Decompressor {
public:
MT21Decompressor(gfx::Size resolution);
~MT21Decompressor();
void MT21ToNV12(const uint8_t* src_y,
const uint8_t* src_uv,
const size_t y_buf_size,
const size_t uv_buf_size,
uint8_t* dest_y,
uint8_t* dest_uv);
private:
// We divide the frame horizontally 4 times and distribute the job among
// the 4 CPU cores in the MT8173. Two of these cores are little cores, so we
// want to divide the task unevenly and make sure the smaller 2 tasks end up
// scheduled on the smaller cores. In order to accomplish this, we circumvent
// Chrome's threading system entirely and use raw operating system threads, so
// we can use sched_setaffinity().
//
// One alternative that was considered was breaking the decompression up into
// a bunch of little atomic tasks and using a threadpool, and just letting
// the OS scheduler figure out the division of labor. This approach has the
// significant drawback however of not only introducing more overhead, but
// more importantly, having potentially very poor memory locality.
//
// Note that we also keep threads alive and waiting between runs of the
// decompression routine. Experimental evidence has indicated that the
// overhead of start and join syscalls substantially lengthen decompression
// times, so we just use userspace semaphores for synchronization instead.
std::atomic_bool should_shutdown_ = false;
std::vector<std::thread> big_core_threads_;
std::vector<scoped_refptr<MT21DecompressionJob>> big_core_jobs_;
raw_ptr<uint8_t> big_core_pivot_;
std::vector<std::thread> little_core_threads_;
std::vector<scoped_refptr<MT21DecompressionJob>> little_core_jobs_;
raw_ptr<uint8_t> little_core_pivot_;
gfx::Size aligned_resolution_;
raw_ptr<GolombRiceTableEntry> symbol_cache_;
};
} // namespace media
#endif // MEDIA_GPU_V4L2_MT21_MT21_DECOMPRESSOR_H_