#include <immintrin.h>
#include "config/av1_rtcd.h"
#include "av1/common/warped_motion.h"
#include "aom_dsp/x86/synonyms.h"
DECLARE_ALIGNED(32, static const uint8_t, shuffle_alpha0_mask01_avx2[32]) = …;
DECLARE_ALIGNED(32, static const uint8_t, shuffle_alpha0_mask23_avx2[32]) = …;
DECLARE_ALIGNED(32, static const uint8_t, shuffle_alpha0_mask45_avx2[32]) = …;
DECLARE_ALIGNED(32, static const uint8_t, shuffle_alpha0_mask67_avx2[32]) = …;
DECLARE_ALIGNED(32, static const uint8_t, shuffle_gamma0_mask0_avx2[32]) = …;
DECLARE_ALIGNED(32, static const uint8_t, shuffle_gamma0_mask1_avx2[32]) = …;
DECLARE_ALIGNED(32, static const uint8_t, shuffle_gamma0_mask2_avx2[32]) = …;
DECLARE_ALIGNED(32, static const uint8_t, shuffle_gamma0_mask3_avx2[32]) = …;
DECLARE_ALIGNED(32, static const uint8_t,
shuffle_src0[32]) = …;
DECLARE_ALIGNED(32, static const uint8_t,
shuffle_src1[32]) = …;
DECLARE_ALIGNED(32, static const uint8_t,
shuffle_src2[32]) = …;
DECLARE_ALIGNED(32, static const uint8_t,
shuffle_src3[32]) = …;
static inline void filter_src_pixels_avx2(const __m256i src, __m256i *horz_out,
__m256i *coeff,
const __m256i *shuffle_src,
const __m256i *round_const,
const __m128i *shift, int row) { … }
static inline void prepare_horizontal_filter_coeff_avx2(int alpha, int beta,
int sx,
__m256i *coeff) { … }
static inline void prepare_horizontal_filter_coeff_beta0_avx2(int alpha, int sx,
__m256i *coeff) { … }
static inline void prepare_horizontal_filter_coeff_alpha0_avx2(int beta, int sx,
__m256i *coeff) { … }
static inline void horizontal_filter_avx2(const __m256i src, __m256i *horz_out,
int sx, int alpha, int beta, int row,
const __m256i *shuffle_src,
const __m256i *round_const,
const __m128i *shift) { … }
static inline void prepare_horizontal_filter_coeff(int alpha, int sx,
__m256i *coeff) { … }
static inline void warp_horizontal_filter_avx2(
const uint8_t *ref, __m256i *horz_out, int stride, int32_t ix4, int32_t iy4,
int32_t sx4, int alpha, int beta, int p_height, int height, int i,
const __m256i *round_const, const __m128i *shift,
const __m256i *shuffle_src) { … }
static inline void warp_horizontal_filter_alpha0_avx2(
const uint8_t *ref, __m256i *horz_out, int stride, int32_t ix4, int32_t iy4,
int32_t sx4, int alpha, int beta, int p_height, int height, int i,
const __m256i *round_const, const __m128i *shift,
const __m256i *shuffle_src) { … }
static inline void warp_horizontal_filter_beta0_avx2(
const uint8_t *ref, __m256i *horz_out, int stride, int32_t ix4, int32_t iy4,
int32_t sx4, int alpha, int beta, int p_height, int height, int i,
const __m256i *round_const, const __m128i *shift,
const __m256i *shuffle_src) { … }
static inline void warp_horizontal_filter_alpha0_beta0_avx2(
const uint8_t *ref, __m256i *horz_out, int stride, int32_t ix4, int32_t iy4,
int32_t sx4, int alpha, int beta, int p_height, int height, int i,
const __m256i *round_const, const __m128i *shift,
const __m256i *shuffle_src) { … }
static inline void unpack_weights_and_set_round_const_avx2(
ConvolveParams *conv_params, const int round_bits, const int offset_bits,
__m256i *res_sub_const, __m256i *round_bits_const, __m256i *wt) { … }
static inline void prepare_vertical_filter_coeffs_avx2(int gamma, int delta,
int sy,
__m256i *coeffs) { … }
static inline void prepare_vertical_filter_coeffs_delta0_avx2(int gamma, int sy,
__m256i *coeffs) { … }
static inline void prepare_vertical_filter_coeffs_gamma0_avx2(int delta, int sy,
__m256i *coeffs) { … }
static inline void filter_src_pixels_vertical_avx2(__m256i *horz_out,
__m256i *src,
__m256i *coeffs,
__m256i *res_lo,
__m256i *res_hi, int row) { … }
static inline void store_vertical_filter_output_avx2(
const __m256i *res_lo, const __m256i *res_hi, const __m256i *res_add_const,
const __m256i *wt, const __m256i *res_sub_const,
const __m256i *round_bits_const, uint8_t *pred, ConvolveParams *conv_params,
int i, int j, int k, const int reduce_bits_vert, int p_stride, int p_width,
const int round_bits) { … }
static inline void warp_vertical_filter_avx2(
uint8_t *pred, __m256i *horz_out, ConvolveParams *conv_params,
int16_t gamma, int16_t delta, int p_height, int p_stride, int p_width,
int i, int j, int sy4, const int reduce_bits_vert,
const __m256i *res_add_const, const int round_bits,
const __m256i *res_sub_const, const __m256i *round_bits_const,
const __m256i *wt) { … }
static inline void warp_vertical_filter_gamma0_avx2(
uint8_t *pred, __m256i *horz_out, ConvolveParams *conv_params,
int16_t gamma, int16_t delta, int p_height, int p_stride, int p_width,
int i, int j, int sy4, const int reduce_bits_vert,
const __m256i *res_add_const, const int round_bits,
const __m256i *res_sub_const, const __m256i *round_bits_const,
const __m256i *wt) { … }
static inline void warp_vertical_filter_delta0_avx2(
uint8_t *pred, __m256i *horz_out, ConvolveParams *conv_params,
int16_t gamma, int16_t delta, int p_height, int p_stride, int p_width,
int i, int j, int sy4, const int reduce_bits_vert,
const __m256i *res_add_const, const int round_bits,
const __m256i *res_sub_const, const __m256i *round_bits_const,
const __m256i *wt) { … }
static inline void warp_vertical_filter_gamma0_delta0_avx2(
uint8_t *pred, __m256i *horz_out, ConvolveParams *conv_params,
int16_t gamma, int16_t delta, int p_height, int p_stride, int p_width,
int i, int j, int sy4, const int reduce_bits_vert,
const __m256i *res_add_const, const int round_bits,
const __m256i *res_sub_const, const __m256i *round_bits_const,
const __m256i *wt) { … }
static inline void prepare_warp_vertical_filter_avx2(
uint8_t *pred, __m256i *horz_out, ConvolveParams *conv_params,
int16_t gamma, int16_t delta, int p_height, int p_stride, int p_width,
int i, int j, int sy4, const int reduce_bits_vert,
const __m256i *res_add_const, const int round_bits,
const __m256i *res_sub_const, const __m256i *round_bits_const,
const __m256i *wt) { … }
static inline void prepare_warp_horizontal_filter_avx2(
const uint8_t *ref, __m256i *horz_out, int stride, int32_t ix4, int32_t iy4,
int32_t sx4, int alpha, int beta, int p_height, int height, int i,
const __m256i *round_const, const __m128i *shift,
const __m256i *shuffle_src) { … }
void av1_warp_affine_avx2(const int32_t *mat, const uint8_t *ref, int width,
int height, int stride, uint8_t *pred, int p_col,
int p_row, int p_width, int p_height, int p_stride,
int subsampling_x, int subsampling_y,
ConvolveParams *conv_params, int16_t alpha,
int16_t beta, int16_t gamma, int16_t delta) { … }