/* * Copyright (c) 2023, Alliance for Open Media. All rights reserved. * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License * was not distributed with this source code in the LICENSE file, you can * obtain it at www.aomedia.org/license/software. If the Alliance for Open * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ #include "config/aom_dsp_rtcd.h" #include "config/av1_rtcd.h" #include "av1/common/reconinter.h" #include "av1/encoder/encodemv.h" #include "av1/encoder/nonrd_opt.h" #include "av1/encoder/rdopt.h" static const SCAN_ORDER av1_fast_idtx_scan_order_16x16 = …; #define DECLARE_BLOCK_YRD_BUFFERS() … #define DECLARE_BLOCK_YRD_VARS() … #define DECLARE_LOOP_VARS_BLOCK_YRD() … static AOM_FORCE_INLINE void update_yrd_loop_vars( MACROBLOCK *x, int *skippable, int step, int ncoeffs, int16_t *const low_coeff, int16_t *const low_qcoeff, int16_t *const low_dqcoeff, RD_STATS *this_rdc, int *eob_cost, int tx_blk_id) { … } static inline void aom_process_hadamard_lp_8x16(MACROBLOCK *x, int max_blocks_high, int max_blocks_wide, int num_4x4_w, int step, int block_step) { … } #if CONFIG_AV1_HIGHBITDEPTH #define DECLARE_BLOCK_YRD_HBD_VARS … static AOM_FORCE_INLINE void update_yrd_loop_vars_hbd( MACROBLOCK *x, int *skippable, int step, int ncoeffs, tran_low_t *const coeff, tran_low_t *const qcoeff, tran_low_t *const dqcoeff, RD_STATS *this_rdc, int *eob_cost, int tx_blk_id) { const MACROBLOCKD *xd = &x->e_mbd; const int is_txfm_skip = (ncoeffs == 0); *skippable &= is_txfm_skip; x->txfm_search_info.blk_skip[tx_blk_id] = is_txfm_skip; *eob_cost += get_msb(ncoeffs + 1); int64_t dummy; if (ncoeffs == 1) this_rdc->rate += (int)abs(qcoeff[0]); else if (ncoeffs > 1) this_rdc->rate += aom_satd(qcoeff, step << 4); this_rdc->dist += av1_highbd_block_error(coeff, dqcoeff, step << 4, &dummy, xd->bd) >> 2; } #endif /*!\brief Calculates RD Cost using Hadamard transform. * * \ingroup nonrd_mode_search * \callgraph * \callergraph * Calculates RD Cost using Hadamard transform. For low bit depth this function * uses low-precision set of functions (16-bit) and 32 bit for high bit depth * \param[in] x Pointer to structure holding all the data for the current macroblock * \param[in] this_rdc Pointer to calculated RD Cost * \param[in] skippable Pointer to a flag indicating possible tx skip * \param[in] bsize Current block size * \param[in] tx_size Transform size * \param[in] is_inter_mode Flag to indicate inter mode * * \remark Nothing is returned. Instead, calculated RD cost is placed to * \c this_rdc. \c skippable flag is set if there is no non-zero quantized * coefficients for Hadamard transform */ void av1_block_yrd(MACROBLOCK *x, RD_STATS *this_rdc, int *skippable, BLOCK_SIZE bsize, TX_SIZE tx_size) { … } // Explicitly enumerate the cases so the compiler can generate SIMD for the // function. According to the disassembler, gcc generates SSE codes for each of // the possible block sizes. The hottest case is tx_width 16, which takes up // about 8% of the self cycle of av1_nonrd_pick_inter_mode_sb. Since // av1_nonrd_pick_inter_mode_sb takes up about 3% of total encoding time, the // potential room of improvement for writing AVX2 optimization is only 3% * 8% = // 0.24% of total encoding time. static inline void scale_square_buf_vals(int16_t *dst, int tx_width, const int16_t *src, int src_stride) { … } /*!\brief Calculates RD Cost when the block uses Identity transform. * Note that this function is only for low bit depth encoding, since it * is called in real-time mode for now, which sets high bit depth to 0: * -DCONFIG_AV1_HIGHBITDEPTH=0 * * \ingroup nonrd_mode_search * \callgraph * \callergraph * Calculates RD Cost. For low bit depth this function * uses low-precision set of functions (16-bit) and 32 bit for high bit depth * \param[in] x Pointer to structure holding all the data for the current macroblock * \param[in] pred_buf Pointer to the prediction buffer * \param[in] pred_stride Stride for the prediction buffer * \param[in] this_rdc Pointer to calculated RD Cost * \param[in] skippable Pointer to a flag indicating possible tx skip * \param[in] bsize Current block size * \param[in] tx_size Transform size * * \remark Nothing is returned. Instead, calculated RD cost is placed to * \c this_rdc. \c skippable flag is set if all coefficients are zero. */ void av1_block_yrd_idtx(MACROBLOCK *x, const uint8_t *const pred_buf, int pred_stride, RD_STATS *this_rdc, int *skippable, BLOCK_SIZE bsize, TX_SIZE tx_size) { … } int64_t av1_model_rd_for_sb_uv(AV1_COMP *cpi, BLOCK_SIZE plane_bsize, MACROBLOCK *x, MACROBLOCKD *xd, RD_STATS *this_rdc, int start_plane, int stop_plane) { … } static void compute_intra_yprediction(const AV1_COMMON *cm, PREDICTION_MODE mode, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd) { … } // Checks whether Intra mode needs to be pruned based on // 'intra_y_mode_bsize_mask_nrd' and 'prune_hv_pred_modes_using_blksad' // speed features. static inline bool is_prune_intra_mode( AV1_COMP *cpi, int mode_index, int force_intra_check, BLOCK_SIZE bsize, uint8_t segment_id, SOURCE_SAD source_sad_nonrd, uint8_t color_sensitivity[MAX_MB_PLANE - 1]) { … } /*!\brief Estimation of RD cost of an intra mode for Non-RD optimized case. * * \ingroup nonrd_mode_search * \callgraph * \callergraph * Calculates RD Cost for an intra mode for a single TX block using Hadamard * transform. * \param[in] plane Color plane * \param[in] block Index of a TX block in a prediction block * \param[in] row Row of a current TX block * \param[in] col Column of a current TX block * \param[in] plane_bsize Block size of a current prediction block * \param[in] tx_size Transform size * \param[in] arg Pointer to a structure that holds parameters * for intra mode search * * \remark Nothing is returned. Instead, best mode and RD Cost of the best mode * are set in \c args->rdc and \c args->mode */ void av1_estimate_block_intra(int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { … } /*!\brief Estimates best intra mode for inter mode search * * \ingroup nonrd_mode_search * \callgraph * \callergraph * * Using heuristics based on best inter mode, block size, and other decides * whether to check intra modes. If so, estimates and selects best intra mode * from the reduced set of intra modes (max 4 intra modes checked) * * \param[in] cpi Top-level encoder structure * \param[in] x Pointer to structure holding all the * data for the current macroblock * \param[in] bsize Current block size * \param[in] best_early_term Flag, indicating that TX for the * best inter mode was skipped * \param[in] ref_cost_intra Cost of signalling intra mode * \param[in] reuse_prediction Flag, indicating prediction re-use * \param[in] orig_dst Original destination buffer * \param[in] tmp_buffers Pointer to a temporary buffers for * prediction re-use * \param[out] this_mode_pred Pointer to store prediction buffer * for prediction re-use * \param[in] best_rdc Pointer to RD cost for the best * selected intra mode * \param[in] best_pickmode Pointer to a structure containing * best mode picked so far * \param[in] ctx Pointer to structure holding coding * contexts and modes for the block * * \remark Nothing is returned. Instead, calculated RD cost is placed to * \c best_rdc and best selected mode is placed to \c best_pickmode * */ void av1_estimate_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int best_early_term, unsigned int ref_cost_intra, int reuse_prediction, struct buf_2d *orig_dst, PRED_BUFFER *tmp_buffers, PRED_BUFFER **this_mode_pred, RD_STATS *best_rdc, BEST_PICKMODE *best_pickmode, PICK_MODE_CONTEXT *ctx) { … }