#include <assert.h>
#include <stdbool.h>
#include "aom_util/aom_pthread.h"
#include "av1/common/warped_motion.h"
#include "av1/common/thread_common.h"
#include "av1/encoder/allintra_vis.h"
#include "av1/encoder/bitstream.h"
#include "av1/encoder/enc_enums.h"
#include "av1/encoder/encodeframe.h"
#include "av1/encoder/encodeframe_utils.h"
#include "av1/encoder/encoder.h"
#include "av1/encoder/encoder_alloc.h"
#include "av1/encoder/ethread.h"
#if !CONFIG_REALTIME_ONLY
#include "av1/encoder/firstpass.h"
#endif
#include "av1/encoder/global_motion.h"
#include "av1/encoder/global_motion_facade.h"
#include "av1/encoder/intra_mode_search_utils.h"
#include "av1/encoder/picklpf.h"
#include "av1/encoder/rdopt.h"
#include "aom_dsp/aom_dsp_common.h"
#include "av1/encoder/temporal_filter.h"
#include "av1/encoder/tpl_model.h"
static inline void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) { … }
static inline void update_delta_lf_for_row_mt(AV1_COMP *cpi) { … }
void av1_row_mt_sync_read_dummy(AV1EncRowMultiThreadSync *row_mt_sync, int r,
int c) { … }
void av1_row_mt_sync_write_dummy(AV1EncRowMultiThreadSync *row_mt_sync, int r,
int c, int cols) { … }
void av1_row_mt_sync_read(AV1EncRowMultiThreadSync *row_mt_sync, int r, int c) { … }
void av1_row_mt_sync_write(AV1EncRowMultiThreadSync *row_mt_sync, int r, int c,
int cols) { … }
static void row_mt_sync_mem_alloc(AV1EncRowMultiThreadSync *row_mt_sync,
AV1_COMMON *cm, int rows) { … }
void av1_row_mt_sync_mem_dealloc(AV1EncRowMultiThreadSync *row_mt_sync) { … }
static inline int get_sb_rows_in_frame(AV1_COMMON *cm) { … }
static void row_mt_mem_alloc(AV1_COMP *cpi, int max_rows, int max_cols,
int alloc_row_ctx) { … }
void av1_row_mt_mem_dealloc(AV1_COMP *cpi) { … }
static inline void assign_tile_to_thread(int *thread_id_to_tile_id,
int num_tiles, int num_workers) { … }
static inline int get_next_job(TileDataEnc *const tile_data,
int *current_mi_row, int mib_size) { … }
static inline void switch_tile_and_get_next_job(
AV1_COMMON *const cm, TileDataEnc *const tile_data, int *cur_tile_id,
int *current_mi_row, int *end_of_frame, int is_firstpass,
const BLOCK_SIZE fp_block_size) { … }
#if !CONFIG_REALTIME_ONLY
static void set_firstpass_encode_done(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt;
const int tile_cols = cm->tiles.cols;
const int tile_rows = cm->tiles.rows;
const BLOCK_SIZE fp_block_size = cpi->fp_block_size;
const int unit_height = mi_size_high[fp_block_size];
for (int tile_row = 0; tile_row < tile_rows; ++tile_row) {
for (int tile_col = 0; tile_col < tile_cols; ++tile_col) {
TileDataEnc *const tile_data =
&cpi->tile_data[tile_row * tile_cols + tile_col];
TileInfo *tile = &tile_data->tile_info;
AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync;
const int unit_cols_in_tile =
av1_get_unit_cols_in_tile(tile, fp_block_size);
for (int mi_row = tile->mi_row_start, unit_row_in_tile = 0;
mi_row < tile->mi_row_end;
mi_row += unit_height, unit_row_in_tile++) {
enc_row_mt->sync_write_ptr(row_mt_sync, unit_row_in_tile,
unit_cols_in_tile - 1, unit_cols_in_tile);
}
}
}
}
static int fp_enc_row_mt_worker_hook(void *arg1, void *unused) {
EncWorkerData *const thread_data = (EncWorkerData *)arg1;
AV1_COMP *const cpi = thread_data->cpi;
int thread_id = thread_data->thread_id;
AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt;
#if CONFIG_MULTITHREAD
pthread_mutex_t *enc_row_mt_mutex_ = enc_row_mt->mutex_;
#endif
(void)unused;
struct aom_internal_error_info *const error_info = &thread_data->error_info;
MACROBLOCKD *const xd = &thread_data->td->mb.e_mbd;
xd->error_info = error_info;
if (setjmp(error_info->jmp)) {
error_info->setjmp = 0;
#if CONFIG_MULTITHREAD
pthread_mutex_lock(enc_row_mt_mutex_);
enc_row_mt->firstpass_mt_exit = true;
pthread_mutex_unlock(enc_row_mt_mutex_);
#endif
set_firstpass_encode_done(cpi);
return 0;
}
error_info->setjmp = 1;
AV1_COMMON *const cm = &cpi->common;
int cur_tile_id = enc_row_mt->thread_id_to_tile_id[thread_id];
assert(cur_tile_id != -1);
const BLOCK_SIZE fp_block_size = cpi->fp_block_size;
const int unit_height = mi_size_high[fp_block_size];
int end_of_frame = 0;
while (1) {
int current_mi_row = -1;
#if CONFIG_MULTITHREAD
pthread_mutex_lock(enc_row_mt_mutex_);
#endif
bool firstpass_mt_exit = enc_row_mt->firstpass_mt_exit;
if (!firstpass_mt_exit && !get_next_job(&cpi->tile_data[cur_tile_id],
¤t_mi_row, unit_height)) {
switch_tile_and_get_next_job(cm, cpi->tile_data, &cur_tile_id,
¤t_mi_row, &end_of_frame, 1,
fp_block_size);
}
#if CONFIG_MULTITHREAD
pthread_mutex_unlock(enc_row_mt_mutex_);
#endif
if (firstpass_mt_exit || end_of_frame) break;
TileDataEnc *const this_tile = &cpi->tile_data[cur_tile_id];
AV1EncRowMultiThreadSync *const row_mt_sync = &this_tile->row_mt_sync;
ThreadData *td = thread_data->td;
assert(current_mi_row != -1 &&
current_mi_row < this_tile->tile_info.mi_row_end);
const int unit_height_log2 = mi_size_high_log2[fp_block_size];
av1_first_pass_row(cpi, td, this_tile, current_mi_row >> unit_height_log2,
fp_block_size);
#if CONFIG_MULTITHREAD
pthread_mutex_lock(enc_row_mt_mutex_);
#endif
row_mt_sync->num_threads_working--;
#if CONFIG_MULTITHREAD
pthread_mutex_unlock(enc_row_mt_mutex_);
#endif
}
error_info->setjmp = 0;
return 1;
}
#endif
static void launch_loop_filter_rows(AV1_COMMON *cm, EncWorkerData *thread_data,
AV1EncRowMultiThreadInfo *enc_row_mt,
int mib_size_log2) { … }
static void set_encoding_done(AV1_COMP *cpi) { … }
static bool lpf_mt_with_enc_enabled(int pipeline_lpf_mt_with_enc,
const int filter_level[2]) { … }
static int enc_row_mt_worker_hook(void *arg1, void *unused) { … }
static int enc_worker_hook(void *arg1, void *unused) { … }
void av1_init_frame_mt(AV1_PRIMARY *ppi, AV1_COMP *cpi) { … }
void av1_init_cdef_worker(AV1_COMP *cpi) { … }
#if !CONFIG_REALTIME_ONLY
void av1_init_lr_mt_buffers(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
AV1LrSync *lr_sync = &cpi->mt_info.lr_row_sync;
if (lr_sync->sync_range) {
if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0)
return;
int num_lr_workers =
av1_get_num_mod_workers_for_alloc(&cpi->ppi->p_mt_info, MOD_LR);
assert(num_lr_workers <= lr_sync->num_workers);
lr_sync->lrworkerdata[num_lr_workers - 1].rst_tmpbuf = cm->rst_tmpbuf;
lr_sync->lrworkerdata[num_lr_workers - 1].rlbs = cm->rlbs;
}
}
#endif
#if CONFIG_MULTITHREAD
void av1_init_mt_sync(AV1_COMP *cpi, int is_first_pass) { … }
#endif
int av1_get_num_mod_workers_for_alloc(const PrimaryMultiThreadInfo *p_mt_info,
MULTI_THREADED_MODULES mod_name) { … }
void av1_init_tile_thread_data(AV1_PRIMARY *ppi, int is_first_pass) { … }
void av1_create_workers(AV1_PRIMARY *ppi, int num_workers) { … }
void av1_terminate_workers(AV1_PRIMARY *ppi) { … }
static inline int is_fpmt_config(const AV1_PRIMARY *ppi,
const AV1EncoderConfig *oxcf) { … }
int av1_check_fpmt_config(AV1_PRIMARY *const ppi,
const AV1EncoderConfig *const oxcf) { … }
#define MAX_THREADS …
static inline int compute_max_num_enc_workers(
CommonModeInfoParams *const mi_params, int mib_size_log2) { … }
int av1_compute_num_fp_contexts(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf) { … }
static inline int compute_num_workers_per_frame(
const int num_workers, const int parallel_frame_count) { … }
static inline void restore_workers_after_fpmt(AV1_PRIMARY *ppi,
int parallel_frame_count,
int num_fpmt_workers_prepared);
static inline void prepare_fpmt_workers(AV1_PRIMARY *ppi,
AV1_COMP_DATA *first_cpi_data,
AVxWorkerHook hook,
int parallel_frame_count) { … }
static inline void launch_fpmt_workers(AV1_PRIMARY *ppi) { … }
static inline void restore_workers_after_fpmt(AV1_PRIMARY *ppi,
int parallel_frame_count,
int num_fpmt_workers_prepared) { … }
static inline void sync_fpmt_workers(AV1_PRIMARY *ppi,
int frames_in_parallel_set) { … }
static int get_compressed_data_hook(void *arg1, void *arg2) { … }
void av1_compress_parallel_frames(AV1_PRIMARY *const ppi,
AV1_COMP_DATA *const first_cpi_data) { … }
static inline void launch_workers(MultiThreadInfo *const mt_info,
int num_workers) { … }
static inline void sync_enc_workers(MultiThreadInfo *const mt_info,
AV1_COMMON *const cm, int num_workers) { … }
static inline void accumulate_counters_enc_workers(AV1_COMP *cpi,
int num_workers) { … }
static inline void prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
int num_workers) { … }
#if !CONFIG_REALTIME_ONLY
static inline void fp_prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
int num_workers) {
AV1_COMMON *const cm = &cpi->common;
MultiThreadInfo *const mt_info = &cpi->mt_info;
for (int i = num_workers - 1; i >= 0; i--) {
AVxWorker *const worker = &mt_info->workers[i];
EncWorkerData *const thread_data = &mt_info->tile_thr_data[i];
worker->hook = hook;
worker->data1 = thread_data;
worker->data2 = NULL;
thread_data->thread_id = i;
thread_data->start = i;
thread_data->cpi = cpi;
if (i == 0) {
thread_data->td = &cpi->td;
} else {
thread_data->td = thread_data->original_td;
thread_data->td->mb = cpi->td.mb;
}
av1_alloc_src_diff_buf(cm, &thread_data->td->mb);
}
}
#endif
static inline int compute_num_enc_row_mt_workers(const AV1_COMMON *cm,
int max_threads) { … }
static inline int compute_num_enc_tile_mt_workers(const AV1_COMMON *cm,
int max_threads) { … }
int av1_get_max_num_workers(const AV1_COMP *cpi) { … }
static int compute_num_enc_workers(const AV1_COMP *cpi, int max_workers) { … }
void av1_encode_tiles_mt(AV1_COMP *cpi) { … }
void av1_accumulate_frame_counts(FRAME_COUNTS *acc_counts,
const FRAME_COUNTS *counts) { … }
static inline void compute_max_sb_rows_cols(const AV1_COMMON *cm,
int *max_sb_rows_in_tile,
int *max_sb_cols_in_tile) { … }
#if !CONFIG_REALTIME_ONLY
int av1_fp_compute_num_enc_workers(AV1_COMP *cpi) {
AV1_COMMON *cm = &cpi->common;
const int tile_cols = cm->tiles.cols;
const int tile_rows = cm->tiles.rows;
int total_num_threads_row_mt = 0;
TileInfo tile_info;
if (cpi->oxcf.max_threads <= 1) return 1;
for (int row = 0; row < tile_rows; row++) {
for (int col = 0; col < tile_cols; col++) {
av1_tile_init(&tile_info, cm, row, col);
const int num_mb_rows_in_tile =
av1_get_unit_rows_in_tile(&tile_info, cpi->fp_block_size);
const int num_mb_cols_in_tile =
av1_get_unit_cols_in_tile(&tile_info, cpi->fp_block_size);
total_num_threads_row_mt +=
AOMMIN((num_mb_cols_in_tile + 1) >> 1, num_mb_rows_in_tile);
}
}
return AOMMIN(cpi->oxcf.max_threads, total_num_threads_row_mt);
}
static inline int fp_compute_max_mb_rows(const AV1_COMMON *cm,
BLOCK_SIZE fp_block_size) {
const int tile_rows = cm->tiles.rows;
const int unit_height_log2 = mi_size_high_log2[fp_block_size];
const int mib_size_log2 = cm->seq_params->mib_size_log2;
const int num_mi_rows = cm->mi_params.mi_rows;
const int *const row_start_sb = cm->tiles.row_start_sb;
int max_mb_rows = 0;
for (int row = 0; row < tile_rows; row++) {
const int mi_row_start = row_start_sb[row] << mib_size_log2;
const int mi_row_end =
AOMMIN(row_start_sb[row + 1] << mib_size_log2, num_mi_rows);
const int num_mb_rows_in_tile =
CEIL_POWER_OF_TWO(mi_row_end - mi_row_start, unit_height_log2);
max_mb_rows = AOMMAX(max_mb_rows, num_mb_rows_in_tile);
}
return max_mb_rows;
}
#endif
static void lpf_pipeline_mt_init(AV1_COMP *cpi, int num_workers) { … }
void av1_encode_tiles_row_mt(AV1_COMP *cpi) { … }
#if !CONFIG_REALTIME_ONLY
static void dealloc_thread_data_src_diff_buf(AV1_COMP *cpi, int num_workers) {
for (int i = num_workers - 1; i >= 0; --i) {
EncWorkerData *const thread_data = &cpi->mt_info.tile_thr_data[i];
if (thread_data->td != &cpi->td)
av1_dealloc_src_diff_buf(&thread_data->td->mb,
av1_num_planes(&cpi->common));
}
}
void av1_fp_encode_tiles_row_mt(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
MultiThreadInfo *const mt_info = &cpi->mt_info;
AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
const int tile_cols = cm->tiles.cols;
const int tile_rows = cm->tiles.rows;
int *thread_id_to_tile_id = enc_row_mt->thread_id_to_tile_id;
int num_workers = 0;
int max_mb_rows = 0;
max_mb_rows = fp_compute_max_mb_rows(cm, cpi->fp_block_size);
const bool alloc_row_mt_mem = enc_row_mt->allocated_tile_cols != tile_cols ||
enc_row_mt->allocated_tile_rows != tile_rows ||
enc_row_mt->allocated_rows != max_mb_rows;
const bool alloc_tile_data = cpi->allocated_tiles < tile_cols * tile_rows;
assert(IMPLIES(cpi->tile_data == NULL, alloc_tile_data));
if (alloc_tile_data) {
av1_alloc_tile_data(cpi);
}
assert(IMPLIES(alloc_tile_data, alloc_row_mt_mem));
if (alloc_row_mt_mem) {
row_mt_mem_alloc(cpi, max_mb_rows, -1, 0);
}
av1_init_tile_data(cpi);
if (mt_info->num_mod_workers[MOD_FP] == 0)
num_workers = av1_fp_compute_num_enc_workers(cpi);
else
num_workers = mt_info->num_mod_workers[MOD_FP];
memset(thread_id_to_tile_id, -1,
sizeof(*thread_id_to_tile_id) * MAX_NUM_THREADS);
enc_row_mt->firstpass_mt_exit = false;
for (int tile_row = 0; tile_row < tile_rows; tile_row++) {
for (int tile_col = 0; tile_col < tile_cols; tile_col++) {
int tile_index = tile_row * tile_cols + tile_col;
TileDataEnc *const this_tile = &cpi->tile_data[tile_index];
AV1EncRowMultiThreadSync *const row_mt_sync = &this_tile->row_mt_sync;
memset(row_mt_sync->num_finished_cols, -1,
sizeof(*row_mt_sync->num_finished_cols) * max_mb_rows);
row_mt_sync->next_mi_row = this_tile->tile_info.mi_row_start;
row_mt_sync->num_threads_working = 0;
row_mt_sync->intrabc_extra_top_right_sb_delay = 0;
}
}
num_workers = AOMMIN(num_workers, mt_info->num_workers);
assign_tile_to_thread(thread_id_to_tile_id, tile_cols * tile_rows,
num_workers);
fp_prepare_enc_workers(cpi, fp_enc_row_mt_worker_hook, num_workers);
launch_workers(&cpi->mt_info, num_workers);
sync_enc_workers(&cpi->mt_info, cm, num_workers);
dealloc_thread_data_src_diff_buf(cpi, num_workers);
}
void av1_tpl_row_mt_sync_read_dummy(AV1TplRowMultiThreadSync *tpl_mt_sync,
int r, int c) {
(void)tpl_mt_sync;
(void)r;
(void)c;
}
void av1_tpl_row_mt_sync_write_dummy(AV1TplRowMultiThreadSync *tpl_mt_sync,
int r, int c, int cols) {
(void)tpl_mt_sync;
(void)r;
(void)c;
(void)cols;
}
void av1_tpl_row_mt_sync_read(AV1TplRowMultiThreadSync *tpl_row_mt_sync, int r,
int c) {
#if CONFIG_MULTITHREAD
int nsync = tpl_row_mt_sync->sync_range;
if (r) {
pthread_mutex_t *const mutex = &tpl_row_mt_sync->mutex_[r - 1];
pthread_mutex_lock(mutex);
while (c > tpl_row_mt_sync->num_finished_cols[r - 1] - nsync)
pthread_cond_wait(&tpl_row_mt_sync->cond_[r - 1], mutex);
pthread_mutex_unlock(mutex);
}
#else
(void)tpl_row_mt_sync;
(void)r;
(void)c;
#endif
}
void av1_tpl_row_mt_sync_write(AV1TplRowMultiThreadSync *tpl_row_mt_sync, int r,
int c, int cols) {
#if CONFIG_MULTITHREAD
int nsync = tpl_row_mt_sync->sync_range;
int cur;
int sig = 1;
if (c < cols - 1) {
cur = c;
if (c % nsync) sig = 0;
} else {
cur = cols + nsync;
}
if (sig) {
pthread_mutex_lock(&tpl_row_mt_sync->mutex_[r]);
tpl_row_mt_sync->num_finished_cols[r] =
AOMMAX(tpl_row_mt_sync->num_finished_cols[r], cur);
pthread_cond_signal(&tpl_row_mt_sync->cond_[r]);
pthread_mutex_unlock(&tpl_row_mt_sync->mutex_[r]);
}
#else
(void)tpl_row_mt_sync;
(void)r;
(void)c;
(void)cols;
#endif
}
static inline void set_mode_estimation_done(AV1_COMP *cpi) {
const CommonModeInfoParams *const mi_params = &cpi->common.mi_params;
TplParams *const tpl_data = &cpi->ppi->tpl_data;
const BLOCK_SIZE bsize =
convert_length_to_bsize(cpi->ppi->tpl_data.tpl_bsize_1d);
const int mi_height = mi_size_high[bsize];
AV1TplRowMultiThreadInfo *const tpl_row_mt = &cpi->mt_info.tpl_row_mt;
const int tplb_cols_in_tile =
ROUND_POWER_OF_TWO(mi_params->mi_cols, mi_size_wide_log2[bsize]);
for (int mi_row = 0, tplb_row = 0; mi_row < mi_params->mi_rows;
mi_row += mi_height, tplb_row++) {
(*tpl_row_mt->sync_write_ptr)(&tpl_data->tpl_mt_sync, tplb_row,
tplb_cols_in_tile - 1, tplb_cols_in_tile);
}
}
static int tpl_worker_hook(void *arg1, void *unused) {
(void)unused;
EncWorkerData *thread_data = (EncWorkerData *)arg1;
AV1_COMP *cpi = thread_data->cpi;
AV1_COMMON *cm = &cpi->common;
MACROBLOCK *x = &thread_data->td->mb;
MACROBLOCKD *xd = &x->e_mbd;
TplTxfmStats *tpl_txfm_stats = &thread_data->td->tpl_txfm_stats;
TplBuffers *tpl_tmp_buffers = &thread_data->td->tpl_tmp_buffers;
CommonModeInfoParams *mi_params = &cm->mi_params;
int num_active_workers = cpi->ppi->tpl_data.tpl_mt_sync.num_threads_working;
struct aom_internal_error_info *const error_info = &thread_data->error_info;
xd->error_info = error_info;
AV1TplRowMultiThreadInfo *const tpl_row_mt = &cpi->mt_info.tpl_row_mt;
(void)tpl_row_mt;
#if CONFIG_MULTITHREAD
pthread_mutex_t *tpl_error_mutex_ = tpl_row_mt->mutex_;
#endif
if (setjmp(error_info->jmp)) {
error_info->setjmp = 0;
#if CONFIG_MULTITHREAD
pthread_mutex_lock(tpl_error_mutex_);
tpl_row_mt->tpl_mt_exit = true;
pthread_mutex_unlock(tpl_error_mutex_);
#endif
set_mode_estimation_done(cpi);
return 0;
}
error_info->setjmp = 1;
BLOCK_SIZE bsize = convert_length_to_bsize(cpi->ppi->tpl_data.tpl_bsize_1d);
TX_SIZE tx_size = max_txsize_lookup[bsize];
int mi_height = mi_size_high[bsize];
av1_init_tpl_txfm_stats(tpl_txfm_stats);
for (int mi_row = thread_data->start * mi_height; mi_row < mi_params->mi_rows;
mi_row += num_active_workers * mi_height) {
av1_set_mv_row_limits(mi_params, &x->mv_limits, mi_row, mi_height,
cpi->oxcf.border_in_pixels);
xd->mb_to_top_edge = -GET_MV_SUBPEL(mi_row * MI_SIZE);
xd->mb_to_bottom_edge =
GET_MV_SUBPEL((mi_params->mi_rows - mi_height - mi_row) * MI_SIZE);
av1_mc_flow_dispenser_row(cpi, tpl_txfm_stats, tpl_tmp_buffers, x, mi_row,
bsize, tx_size);
}
error_info->setjmp = 0;
return 1;
}
void av1_tpl_dealloc(AV1TplRowMultiThreadSync *tpl_sync) {
assert(tpl_sync != NULL);
#if CONFIG_MULTITHREAD
if (tpl_sync->mutex_ != NULL) {
for (int i = 0; i < tpl_sync->rows; ++i)
pthread_mutex_destroy(&tpl_sync->mutex_[i]);
aom_free(tpl_sync->mutex_);
}
if (tpl_sync->cond_ != NULL) {
for (int i = 0; i < tpl_sync->rows; ++i)
pthread_cond_destroy(&tpl_sync->cond_[i]);
aom_free(tpl_sync->cond_);
}
#endif
aom_free(tpl_sync->num_finished_cols);
av1_zero(*tpl_sync);
}
static void av1_tpl_alloc(AV1TplRowMultiThreadSync *tpl_sync, AV1_COMMON *cm,
int mb_rows) {
tpl_sync->rows = mb_rows;
#if CONFIG_MULTITHREAD
{
CHECK_MEM_ERROR(cm, tpl_sync->mutex_,
aom_malloc(sizeof(*tpl_sync->mutex_) * mb_rows));
if (tpl_sync->mutex_) {
for (int i = 0; i < mb_rows; ++i)
pthread_mutex_init(&tpl_sync->mutex_[i], NULL);
}
CHECK_MEM_ERROR(cm, tpl_sync->cond_,
aom_malloc(sizeof(*tpl_sync->cond_) * mb_rows));
if (tpl_sync->cond_) {
for (int i = 0; i < mb_rows; ++i)
pthread_cond_init(&tpl_sync->cond_[i], NULL);
}
}
#endif
CHECK_MEM_ERROR(cm, tpl_sync->num_finished_cols,
aom_malloc(sizeof(*tpl_sync->num_finished_cols) * mb_rows));
tpl_sync->sync_range = 1;
}
static inline void prepare_tpl_workers(AV1_COMP *cpi, AVxWorkerHook hook,
int num_workers) {
MultiThreadInfo *mt_info = &cpi->mt_info;
for (int i = num_workers - 1; i >= 0; i--) {
AVxWorker *worker = &mt_info->workers[i];
EncWorkerData *thread_data = &mt_info->tile_thr_data[i];
worker->hook = hook;
worker->data1 = thread_data;
worker->data2 = NULL;
thread_data->thread_id = i;
thread_data->start = i;
thread_data->cpi = cpi;
if (i == 0) {
thread_data->td = &cpi->td;
} else {
thread_data->td = thread_data->original_td;
}
if (thread_data->td != &cpi->td) {
thread_data->td->mb = cpi->td.mb;
av1_init_obmc_buffer(&thread_data->td->mb.obmc_buffer);
if (!tpl_alloc_temp_buffers(&thread_data->td->tpl_tmp_buffers,
cpi->ppi->tpl_data.tpl_bsize_1d)) {
aom_internal_error(cpi->common.error, AOM_CODEC_MEM_ERROR,
"Error allocating tpl data");
}
thread_data->td->mb.tmp_conv_dst = thread_data->td->tmp_conv_dst;
thread_data->td->mb.e_mbd.tmp_conv_dst = thread_data->td->mb.tmp_conv_dst;
}
}
}
#if CONFIG_BITRATE_ACCURACY
static void tpl_accumulate_txfm_stats(ThreadData *main_td,
const MultiThreadInfo *mt_info,
int num_workers) {
TplTxfmStats *accumulated_stats = &main_td->tpl_txfm_stats;
for (int i = num_workers - 1; i >= 0; i--) {
AVxWorker *const worker = &mt_info->workers[i];
EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
ThreadData *td = thread_data->td;
if (td != main_td) {
const TplTxfmStats *tpl_txfm_stats = &td->tpl_txfm_stats;
av1_accumulate_tpl_txfm_stats(tpl_txfm_stats, accumulated_stats);
}
}
}
#endif
void av1_mc_flow_dispenser_mt(AV1_COMP *cpi) {
AV1_COMMON *cm = &cpi->common;
CommonModeInfoParams *mi_params = &cm->mi_params;
MultiThreadInfo *mt_info = &cpi->mt_info;
TplParams *tpl_data = &cpi->ppi->tpl_data;
AV1TplRowMultiThreadSync *tpl_sync = &tpl_data->tpl_mt_sync;
int mb_rows = mi_params->mb_rows;
int num_workers =
AOMMIN(mt_info->num_mod_workers[MOD_TPL], mt_info->num_workers);
if (mb_rows != tpl_sync->rows) {
av1_tpl_dealloc(tpl_sync);
av1_tpl_alloc(tpl_sync, cm, mb_rows);
}
tpl_sync->num_threads_working = num_workers;
mt_info->tpl_row_mt.tpl_mt_exit = false;
memset(tpl_sync->num_finished_cols, -1,
sizeof(*tpl_sync->num_finished_cols) * mb_rows);
prepare_tpl_workers(cpi, tpl_worker_hook, num_workers);
launch_workers(&cpi->mt_info, num_workers);
sync_enc_workers(&cpi->mt_info, cm, num_workers);
#if CONFIG_BITRATE_ACCURACY
tpl_accumulate_txfm_stats(&cpi->td, &cpi->mt_info, num_workers);
#endif
for (int i = num_workers - 1; i >= 0; i--) {
EncWorkerData *thread_data = &mt_info->tile_thr_data[i];
ThreadData *td = thread_data->td;
if (td != &cpi->td) tpl_dealloc_temp_buffers(&td->tpl_tmp_buffers);
}
}
void av1_tf_mt_dealloc(AV1TemporalFilterSync *tf_sync) {
assert(tf_sync != NULL);
#if CONFIG_MULTITHREAD
if (tf_sync->mutex_ != NULL) {
pthread_mutex_destroy(tf_sync->mutex_);
aom_free(tf_sync->mutex_);
}
#endif
tf_sync->next_tf_row = 0;
}
static inline int tf_get_next_job(AV1TemporalFilterSync *tf_mt_sync,
int *current_mb_row, int mb_rows) {
int do_next_row = 0;
#if CONFIG_MULTITHREAD
pthread_mutex_t *tf_mutex_ = tf_mt_sync->mutex_;
pthread_mutex_lock(tf_mutex_);
#endif
if (!tf_mt_sync->tf_mt_exit && tf_mt_sync->next_tf_row < mb_rows) {
*current_mb_row = tf_mt_sync->next_tf_row;
tf_mt_sync->next_tf_row++;
do_next_row = 1;
}
#if CONFIG_MULTITHREAD
pthread_mutex_unlock(tf_mutex_);
#endif
return do_next_row;
}
static int tf_worker_hook(void *arg1, void *unused) {
(void)unused;
EncWorkerData *thread_data = (EncWorkerData *)arg1;
AV1_COMP *cpi = thread_data->cpi;
ThreadData *td = thread_data->td;
TemporalFilterCtx *tf_ctx = &cpi->tf_ctx;
AV1TemporalFilterSync *tf_sync = &cpi->mt_info.tf_sync;
const struct scale_factors *scale = &cpi->tf_ctx.sf;
#if CONFIG_MULTITHREAD
pthread_mutex_t *tf_mutex_ = tf_sync->mutex_;
#endif
MACROBLOCKD *const xd = &thread_data->td->mb.e_mbd;
struct aom_internal_error_info *const error_info = &thread_data->error_info;
xd->error_info = error_info;
if (setjmp(error_info->jmp)) {
error_info->setjmp = 0;
#if CONFIG_MULTITHREAD
pthread_mutex_lock(tf_mutex_);
tf_sync->tf_mt_exit = true;
pthread_mutex_unlock(tf_mutex_);
#endif
return 0;
}
error_info->setjmp = 1;
const int num_planes = av1_num_planes(&cpi->common);
assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
MACROBLOCKD *mbd = &td->mb.e_mbd;
uint8_t *input_buffer[MAX_MB_PLANE];
MB_MODE_INFO **input_mb_mode_info;
tf_save_state(mbd, &input_mb_mode_info, input_buffer, num_planes);
tf_setup_macroblockd(mbd, &td->tf_data, scale);
int current_mb_row = -1;
while (tf_get_next_job(tf_sync, ¤t_mb_row, tf_ctx->mb_rows))
av1_tf_do_filtering_row(cpi, td, current_mb_row);
tf_restore_state(mbd, input_mb_mode_info, input_buffer, num_planes);
error_info->setjmp = 0;
return 1;
}
static void prepare_tf_workers(AV1_COMP *cpi, AVxWorkerHook hook,
int num_workers, int is_highbitdepth) {
MultiThreadInfo *mt_info = &cpi->mt_info;
mt_info->tf_sync.next_tf_row = 0;
mt_info->tf_sync.tf_mt_exit = false;
for (int i = num_workers - 1; i >= 0; i--) {
AVxWorker *worker = &mt_info->workers[i];
EncWorkerData *thread_data = &mt_info->tile_thr_data[i];
worker->hook = hook;
worker->data1 = thread_data;
worker->data2 = NULL;
thread_data->thread_id = i;
thread_data->start = i;
thread_data->cpi = cpi;
if (i == 0) {
thread_data->td = &cpi->td;
} else {
thread_data->td = thread_data->original_td;
}
if (thread_data->td != &cpi->td) {
thread_data->td->mb = cpi->td.mb;
av1_init_obmc_buffer(&thread_data->td->mb.obmc_buffer);
if (!tf_alloc_and_reset_data(&thread_data->td->tf_data,
cpi->tf_ctx.num_pels, is_highbitdepth)) {
aom_internal_error(cpi->common.error, AOM_CODEC_MEM_ERROR,
"Error allocating temporal filter data");
}
}
}
}
static void tf_dealloc_thread_data(AV1_COMP *cpi, int num_workers,
int is_highbitdepth) {
MultiThreadInfo *mt_info = &cpi->mt_info;
for (int i = num_workers - 1; i >= 0; i--) {
EncWorkerData *thread_data = &mt_info->tile_thr_data[i];
ThreadData *td = thread_data->td;
if (td != &cpi->td) tf_dealloc_data(&td->tf_data, is_highbitdepth);
}
}
static void tf_accumulate_frame_diff(AV1_COMP *cpi, int num_workers) {
FRAME_DIFF *total_diff = &cpi->td.tf_data.diff;
for (int i = num_workers - 1; i >= 0; i--) {
AVxWorker *const worker = &cpi->mt_info.workers[i];
EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
ThreadData *td = thread_data->td;
FRAME_DIFF *diff = &td->tf_data.diff;
if (td != &cpi->td) {
total_diff->sse += diff->sse;
total_diff->sum += diff->sum;
}
}
}
void av1_tf_do_filtering_mt(AV1_COMP *cpi) {
AV1_COMMON *cm = &cpi->common;
MultiThreadInfo *mt_info = &cpi->mt_info;
const int is_highbitdepth = cpi->tf_ctx.is_highbitdepth;
int num_workers =
AOMMIN(mt_info->num_mod_workers[MOD_TF], mt_info->num_workers);
prepare_tf_workers(cpi, tf_worker_hook, num_workers, is_highbitdepth);
launch_workers(mt_info, num_workers);
sync_enc_workers(mt_info, cm, num_workers);
tf_accumulate_frame_diff(cpi, num_workers);
tf_dealloc_thread_data(cpi, num_workers, is_highbitdepth);
}
static inline int get_next_gm_job(AV1_COMP *cpi, int *frame_idx, int cur_dir) {
GlobalMotionInfo *gm_info = &cpi->gm_info;
GlobalMotionJobInfo *job_info = &cpi->mt_info.gm_sync.job_info;
int total_refs = gm_info->num_ref_frames[cur_dir];
int8_t cur_frame_to_process = job_info->next_frame_to_process[cur_dir];
if (cur_frame_to_process < total_refs && !job_info->early_exit[cur_dir]) {
*frame_idx = gm_info->reference_frames[cur_dir][cur_frame_to_process].frame;
job_info->next_frame_to_process[cur_dir] += 1;
return 1;
}
return 0;
}
static inline void switch_direction(AV1_COMP *cpi, int *frame_idx,
int *cur_dir) {
if (cpi->sf.gm_sf.prune_ref_frame_for_gm_search) return;
*cur_dir = !(*cur_dir);
get_next_gm_job(cpi, frame_idx, *(cur_dir));
}
static int gm_mt_worker_hook(void *arg1, void *unused) {
(void)unused;
EncWorkerData *thread_data = (EncWorkerData *)arg1;
AV1_COMP *cpi = thread_data->cpi;
GlobalMotionInfo *gm_info = &cpi->gm_info;
AV1GlobalMotionSync *gm_sync = &cpi->mt_info.gm_sync;
GlobalMotionJobInfo *job_info = &gm_sync->job_info;
int thread_id = thread_data->thread_id;
GlobalMotionData *gm_thread_data = &thread_data->td->gm_data;
#if CONFIG_MULTITHREAD
pthread_mutex_t *gm_mt_mutex_ = gm_sync->mutex_;
#endif
MACROBLOCKD *const xd = &thread_data->td->mb.e_mbd;
struct aom_internal_error_info *const error_info = &thread_data->error_info;
xd->error_info = error_info;
if (setjmp(error_info->jmp)) {
error_info->setjmp = 0;
#if CONFIG_MULTITHREAD
pthread_mutex_lock(gm_mt_mutex_);
gm_sync->gm_mt_exit = true;
pthread_mutex_unlock(gm_mt_mutex_);
#endif
return 0;
}
error_info->setjmp = 1;
int cur_dir = job_info->thread_id_to_dir[thread_id];
bool gm_mt_exit = false;
while (1) {
int ref_buf_idx = -1;
#if CONFIG_MULTITHREAD
pthread_mutex_lock(gm_mt_mutex_);
#endif
gm_mt_exit = gm_sync->gm_mt_exit;
if (!gm_mt_exit && !get_next_gm_job(cpi, &ref_buf_idx, cur_dir)) {
switch_direction(cpi, &ref_buf_idx, &cur_dir);
}
#if CONFIG_MULTITHREAD
pthread_mutex_unlock(gm_mt_mutex_);
#endif
if (gm_mt_exit || ref_buf_idx == -1) break;
av1_compute_gm_for_valid_ref_frames(
cpi, error_info, gm_info->ref_buf, ref_buf_idx,
gm_thread_data->motion_models, gm_thread_data->segment_map,
gm_info->segment_map_w, gm_info->segment_map_h);
#if CONFIG_MULTITHREAD
pthread_mutex_lock(gm_mt_mutex_);
#endif
if (cpi->sf.gm_sf.prune_ref_frame_for_gm_search &&
cpi->common.global_motion[ref_buf_idx].wmtype <= TRANSLATION)
job_info->early_exit[cur_dir] = 1;
#if CONFIG_MULTITHREAD
pthread_mutex_unlock(gm_mt_mutex_);
#endif
}
error_info->setjmp = 0;
return 1;
}
static inline void prepare_gm_workers(AV1_COMP *cpi, AVxWorkerHook hook,
int num_workers) {
MultiThreadInfo *mt_info = &cpi->mt_info;
mt_info->gm_sync.gm_mt_exit = false;
for (int i = num_workers - 1; i >= 0; i--) {
AVxWorker *worker = &mt_info->workers[i];
EncWorkerData *thread_data = &mt_info->tile_thr_data[i];
worker->hook = hook;
worker->data1 = thread_data;
worker->data2 = NULL;
thread_data->thread_id = i;
thread_data->start = i;
thread_data->cpi = cpi;
if (i == 0) {
thread_data->td = &cpi->td;
} else {
thread_data->td = thread_data->original_td;
}
if (thread_data->td != &cpi->td)
gm_alloc_data(cpi, &thread_data->td->gm_data);
}
}
static inline void assign_thread_to_dir(int8_t *thread_id_to_dir,
int num_workers) {
int8_t frame_dir_idx = 0;
for (int i = 0; i < num_workers; i++) {
thread_id_to_dir[i] = frame_dir_idx++;
if (frame_dir_idx == MAX_DIRECTIONS) frame_dir_idx = 0;
}
}
static inline int compute_gm_workers(const AV1_COMP *cpi) {
int total_refs =
cpi->gm_info.num_ref_frames[0] + cpi->gm_info.num_ref_frames[1];
int num_gm_workers = cpi->sf.gm_sf.prune_ref_frame_for_gm_search
? AOMMIN(MAX_DIRECTIONS, total_refs)
: total_refs;
num_gm_workers = AOMMIN(num_gm_workers, cpi->mt_info.num_workers);
return (num_gm_workers);
}
static inline void gm_dealloc_thread_data(AV1_COMP *cpi, int num_workers) {
MultiThreadInfo *mt_info = &cpi->mt_info;
for (int j = 0; j < num_workers; j++) {
EncWorkerData *thread_data = &mt_info->tile_thr_data[j];
ThreadData *td = thread_data->td;
if (td != &cpi->td) gm_dealloc_data(&td->gm_data);
}
}
void av1_global_motion_estimation_mt(AV1_COMP *cpi) {
GlobalMotionJobInfo *job_info = &cpi->mt_info.gm_sync.job_info;
av1_zero(*job_info);
int num_workers = compute_gm_workers(cpi);
assign_thread_to_dir(job_info->thread_id_to_dir, num_workers);
prepare_gm_workers(cpi, gm_mt_worker_hook, num_workers);
launch_workers(&cpi->mt_info, num_workers);
sync_enc_workers(&cpi->mt_info, &cpi->common, num_workers);
gm_dealloc_thread_data(cpi, num_workers);
}
#endif
static inline int get_next_job_allintra(
AV1EncRowMultiThreadSync *const row_mt_sync, const int mi_row_end,
int *current_mi_row, int mib_size) { … }
static inline void prepare_wiener_var_workers(AV1_COMP *const cpi,
AVxWorkerHook hook,
const int num_workers) { … }
static void set_mb_wiener_var_calc_done(AV1_COMP *const cpi) { … }
static int cal_mb_wiener_var_hook(void *arg1, void *unused) { … }
static void dealloc_mb_wiener_var_mt_data(AV1_COMP *cpi, int num_workers) { … }
void av1_calc_mb_wiener_var_mt(AV1_COMP *cpi, int num_workers,
double *sum_rec_distortion,
double *sum_est_rate) { … }
static int compare_tile_order(const void *a, const void *b) { … }
static inline int get_next_pack_bs_tile_idx(
AV1EncPackBSSync *const pack_bs_sync, const int num_tiles) { … }
static inline size_t get_bs_chunk_size(int tg_or_tile_size,
const int frame_or_tg_size,
size_t *remain_buf_size,
size_t max_buf_size, int is_last_chunk) { … }
static void init_tile_pack_bs_params(AV1_COMP *const cpi, uint8_t *const dst,
struct aom_write_bit_buffer *saved_wb,
PackBSParams *const pack_bs_params_arr,
uint8_t obu_extn_header) { … }
static int pack_bs_worker_hook(void *arg1, void *arg2) { … }
static void prepare_pack_bs_workers(AV1_COMP *const cpi,
PackBSParams *const pack_bs_params,
AVxWorkerHook hook, const int num_workers) { … }
static void accumulate_pack_bs_data(
AV1_COMP *const cpi, const PackBSParams *const pack_bs_params_arr,
uint8_t *const dst, uint32_t *total_size, const FrameHeaderInfo *fh_info,
int *const largest_tile_id, unsigned int *max_tile_size,
uint32_t *const obu_header_size, uint8_t **tile_data_start,
const int num_workers) { … }
void av1_write_tile_obu_mt(
AV1_COMP *const cpi, uint8_t *const dst, uint32_t *total_size,
struct aom_write_bit_buffer *saved_wb, uint8_t obu_extn_header,
const FrameHeaderInfo *fh_info, int *const largest_tile_id,
unsigned int *max_tile_size, uint32_t *const obu_header_size,
uint8_t **tile_data_start, const int num_workers) { … }
void av1_cdef_mt_dealloc(AV1CdefSync *cdef_sync) { … }
static void update_next_job_info(AV1CdefSync *cdef_sync, int nvfb, int nhfb) { … }
static inline void cdef_reset_job_info(AV1CdefSync *cdef_sync) { … }
static inline int cdef_get_next_job(AV1CdefSync *cdef_sync,
CdefSearchCtx *cdef_search_ctx,
volatile int *cur_fbr,
volatile int *cur_fbc,
volatile int *sb_count) { … }
static int cdef_filter_block_worker_hook(void *arg1, void *arg2) { … }
static void prepare_cdef_workers(AV1_COMP *cpi, AVxWorkerHook hook,
int num_workers) { … }
void av1_cdef_mse_calc_frame_mt(AV1_COMP *cpi) { … }
static inline int compute_num_tf_workers(const AV1_COMP *cpi) { … }
static inline int compute_num_tpl_workers(AV1_COMP *cpi) { … }
static inline int compute_num_lf_workers(AV1_COMP *cpi) { … }
static inline int compute_num_cdef_workers(AV1_COMP *cpi) { … }
static inline int compute_num_lr_workers(AV1_COMP *cpi) { … }
static inline int compute_num_pack_bs_workers(AV1_COMP *cpi) { … }
static inline int compute_num_ai_workers(AV1_COMP *cpi) { … }
static int compute_num_mod_workers(AV1_COMP *cpi,
MULTI_THREADED_MODULES mod_name) { … }
void av1_compute_num_workers_for_mt(AV1_COMP *cpi) { … }