#include <immintrin.h>
#include "./vpx_dsp_rtcd.h"
DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = …;
DECLARE_ALIGNED(32, static const int8_t, adjacent_sub_avx2[32]) = …;
static INLINE void variance_kernel_avx2(const __m256i src, const __m256i ref,
__m256i *const sse,
__m256i *const sum) { … }
static INLINE void variance_final_from_32bit_sum_avx2(__m256i vsse,
__m128i vsum,
unsigned int *const sse,
int *const sum) { … }
static INLINE void variance_final_from_16bit_sum_avx2(__m256i vsse,
__m256i vsum,
unsigned int *const sse,
int *const sum) { … }
static INLINE __m256i sum_to_32bit_avx2(const __m256i sum) { … }
static INLINE void variance8_kernel_avx2(
const uint8_t *const src, const int src_stride, const uint8_t *const ref,
const int ref_stride, __m256i *const sse, __m256i *const sum) { … }
static INLINE void variance16_kernel_avx2(
const uint8_t *const src, const int src_stride, const uint8_t *const ref,
const int ref_stride, __m256i *const sse, __m256i *const sum) { … }
static INLINE void variance32_kernel_avx2(const uint8_t *const src,
const uint8_t *const ref,
__m256i *const sse,
__m256i *const sum) { … }
static INLINE void variance8_avx2(const uint8_t *src, const int src_stride,
const uint8_t *ref, const int ref_stride,
const int h, __m256i *const vsse,
__m256i *const vsum) { … }
static INLINE void variance16_avx2(const uint8_t *src, const int src_stride,
const uint8_t *ref, const int ref_stride,
const int h, __m256i *const vsse,
__m256i *const vsum) { … }
static INLINE void variance32_avx2(const uint8_t *src, const int src_stride,
const uint8_t *ref, const int ref_stride,
const int h, __m256i *const vsse,
__m256i *const vsum) { … }
static INLINE void variance64_avx2(const uint8_t *src, const int src_stride,
const uint8_t *ref, const int ref_stride,
const int h, __m256i *const vsse,
__m256i *const vsum) { … }
void vpx_get16x16var_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse, int *sum) { … }
#define FILTER_SRC(filter) …
#define CALC_SUM_SSE_INSIDE_LOOP …
#define CALC_SUM_AND_SSE …
static INLINE void spv32_x0_y0(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
const uint8_t *second_pred, int second_stride,
int do_sec, int height, __m256i *sum_reg,
__m256i *sse_reg) { … }
static INLINE void spv32_half_zero(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
const uint8_t *second_pred,
int second_stride, int do_sec, int height,
__m256i *sum_reg, __m256i *sse_reg,
int sstep) { … }
static INLINE void spv32_x0_y4(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
const uint8_t *second_pred, int second_stride,
int do_sec, int height, __m256i *sum_reg,
__m256i *sse_reg) { … }
static INLINE void spv32_x4_y0(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
const uint8_t *second_pred, int second_stride,
int do_sec, int height, __m256i *sum_reg,
__m256i *sse_reg) { … }
static INLINE void spv32_x4_y4(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
const uint8_t *second_pred, int second_stride,
int do_sec, int height, __m256i *sum_reg,
__m256i *sse_reg) { … }
static INLINE void spv32_bilin_zero(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
const uint8_t *second_pred,
int second_stride, int do_sec, int height,
__m256i *sum_reg, __m256i *sse_reg,
int offset, int sstep) { … }
static INLINE void spv32_x0_yb(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
const uint8_t *second_pred, int second_stride,
int do_sec, int height, __m256i *sum_reg,
__m256i *sse_reg, int y_offset) { … }
static INLINE void spv32_xb_y0(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
const uint8_t *second_pred, int second_stride,
int do_sec, int height, __m256i *sum_reg,
__m256i *sse_reg, int x_offset) { … }
static INLINE void spv32_x4_yb(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
const uint8_t *second_pred, int second_stride,
int do_sec, int height, __m256i *sum_reg,
__m256i *sse_reg, int y_offset) { … }
static INLINE void spv32_xb_y4(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
const uint8_t *second_pred, int second_stride,
int do_sec, int height, __m256i *sum_reg,
__m256i *sse_reg, int x_offset) { … }
static INLINE void spv32_xb_yb(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
const uint8_t *second_pred, int second_stride,
int do_sec, int height, __m256i *sum_reg,
__m256i *sse_reg, int x_offset, int y_offset) { … }
static INLINE int sub_pix_var32xh(const uint8_t *src, int src_stride,
int x_offset, int y_offset,
const uint8_t *dst, int dst_stride,
const uint8_t *second_pred, int second_stride,
int do_sec, int height, unsigned int *sse) { … }
static int sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride,
int x_offset, int y_offset,
const uint8_t *dst, int dst_stride,
int height, unsigned int *sse) { … }
static int sub_pixel_avg_variance32xh_avx2(const uint8_t *src, int src_stride,
int x_offset, int y_offset,
const uint8_t *dst, int dst_stride,
const uint8_t *second_pred,
int second_stride, int height,
unsigned int *sse) { … }
get_var_avx2;
unsigned int vpx_variance8x4_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) { … }
unsigned int vpx_variance8x8_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) { … }
unsigned int vpx_variance8x16_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) { … }
unsigned int vpx_variance16x8_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) { … }
unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) { … }
unsigned int vpx_variance16x32_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) { … }
unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) { … }
unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) { … }
unsigned int vpx_variance32x64_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) { … }
unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) { … }
unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) { … }
unsigned int vpx_mse16x8_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) { … }
unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse) { … }
unsigned int vpx_sub_pixel_variance64x64_avx2(
const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,
const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { … }
unsigned int vpx_sub_pixel_variance32x32_avx2(
const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,
const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { … }
unsigned int vpx_sub_pixel_avg_variance64x64_avx2(
const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,
const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
const uint8_t *second_pred) { … }
unsigned int vpx_sub_pixel_avg_variance32x32_avx2(
const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,
const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
const uint8_t *second_pred) { … }