#include <emmintrin.h>
#include <smmintrin.h>
#include "config/av1_rtcd.h"
#include "av1/common/warped_motion.h"
DECLARE_ALIGNED(8, const int8_t,
av1_filter_8bit[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8]) = …;
DECLARE_ALIGNED(16, static const uint8_t,
even_mask[16]) = …;
DECLARE_ALIGNED(16, static const uint8_t,
odd_mask[16]) = …;
DECLARE_ALIGNED(16, static const uint8_t,
shuffle_alpha0_mask01[16]) = …;
DECLARE_ALIGNED(16, static const uint8_t,
shuffle_alpha0_mask23[16]) = …;
DECLARE_ALIGNED(16, static const uint8_t,
shuffle_alpha0_mask45[16]) = …;
DECLARE_ALIGNED(16, static const uint8_t,
shuffle_alpha0_mask67[16]) = …;
DECLARE_ALIGNED(16, static const uint8_t,
shuffle_gamma0_mask0[16]) = …;
DECLARE_ALIGNED(16, static const uint8_t,
shuffle_gamma0_mask1[16]) = …;
DECLARE_ALIGNED(16, static const uint8_t,
shuffle_gamma0_mask2[16]) = …;
DECLARE_ALIGNED(16, static const uint8_t,
shuffle_gamma0_mask3[16]) = …;
static inline void filter_src_pixels(__m128i src, __m128i *tmp, __m128i *coeff,
const int offset_bits_horiz,
const int reduce_bits_horiz, int k) { … }
static inline void prepare_horizontal_filter_coeff(int alpha, int sx,
__m128i *coeff) { … }
static inline void prepare_horizontal_filter_coeff_alpha0(int sx,
__m128i *coeff) { … }
static inline void horizontal_filter(__m128i src, __m128i *tmp, int sx,
int alpha, int k,
const int offset_bits_horiz,
const int reduce_bits_horiz) { … }
static inline void warp_horizontal_filter(const uint8_t *ref, __m128i *tmp,
int stride, int32_t ix4, int32_t iy4,
int32_t sx4, int alpha, int beta,
int p_height, int height, int i,
const int offset_bits_horiz,
const int reduce_bits_horiz) { … }
static inline void warp_horizontal_filter_alpha0(
const uint8_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4,
int32_t sx4, int alpha, int beta, int p_height, int height, int i,
const int offset_bits_horiz, const int reduce_bits_horiz) { … }
static inline void warp_horizontal_filter_beta0(
const uint8_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4,
int32_t sx4, int alpha, int beta, int p_height, int height, int i,
const int offset_bits_horiz, const int reduce_bits_horiz) { … }
static inline void warp_horizontal_filter_alpha0_beta0(
const uint8_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4,
int32_t sx4, int alpha, int beta, int p_height, int height, int i,
const int offset_bits_horiz, const int reduce_bits_horiz) { … }
static inline void unpack_weights_and_set_round_const(
ConvolveParams *conv_params, const int round_bits, const int offset_bits,
__m128i *res_sub_const, __m128i *round_bits_const, __m128i *wt) { … }
static inline void prepare_vertical_filter_coeffs(int gamma, int sy,
__m128i *coeffs) { … }
static inline void prepare_vertical_filter_coeffs_gamma0(int sy,
__m128i *coeffs) { … }
static inline void filter_src_pixels_vertical(__m128i *tmp, __m128i *coeffs,
__m128i *res_lo, __m128i *res_hi,
int k) { … }
static inline void store_vertical_filter_output(
__m128i *res_lo, __m128i *res_hi, const __m128i *res_add_const,
const __m128i *wt, const __m128i *res_sub_const, __m128i *round_bits_const,
uint8_t *pred, ConvolveParams *conv_params, int i, int j, int k,
const int reduce_bits_vert, int p_stride, int p_width,
const int round_bits) { … }
static inline void warp_vertical_filter(
uint8_t *pred, __m128i *tmp, ConvolveParams *conv_params, int16_t gamma,
int16_t delta, int p_height, int p_stride, int p_width, int i, int j,
int sy4, const int reduce_bits_vert, const __m128i *res_add_const,
const int round_bits, const int offset_bits) { … }
static inline void warp_vertical_filter_gamma0(
uint8_t *pred, __m128i *tmp, ConvolveParams *conv_params, int16_t gamma,
int16_t delta, int p_height, int p_stride, int p_width, int i, int j,
int sy4, const int reduce_bits_vert, const __m128i *res_add_const,
const int round_bits, const int offset_bits) { … }
static inline void warp_vertical_filter_delta0(
uint8_t *pred, __m128i *tmp, ConvolveParams *conv_params, int16_t gamma,
int16_t delta, int p_height, int p_stride, int p_width, int i, int j,
int sy4, const int reduce_bits_vert, const __m128i *res_add_const,
const int round_bits, const int offset_bits) { … }
static inline void warp_vertical_filter_gamma0_delta0(
uint8_t *pred, __m128i *tmp, ConvolveParams *conv_params, int16_t gamma,
int16_t delta, int p_height, int p_stride, int p_width, int i, int j,
int sy4, const int reduce_bits_vert, const __m128i *res_add_const,
const int round_bits, const int offset_bits) { … }
static inline void prepare_warp_vertical_filter(
uint8_t *pred, __m128i *tmp, ConvolveParams *conv_params, int16_t gamma,
int16_t delta, int p_height, int p_stride, int p_width, int i, int j,
int sy4, const int reduce_bits_vert, const __m128i *res_add_const,
const int round_bits, const int offset_bits) { … }
static inline void prepare_warp_horizontal_filter(
const uint8_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4,
int32_t sx4, int alpha, int beta, int p_height, int height, int i,
const int offset_bits_horiz, const int reduce_bits_horiz) { … }
void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width,
int height, int stride, uint8_t *pred, int p_col,
int p_row, int p_width, int p_height, int p_stride,
int subsampling_x, int subsampling_y,
ConvolveParams *conv_params, int16_t alpha,
int16_t beta, int16_t gamma, int16_t delta) { … }