#ifndef THIRD_PARTY_SVT_AV1_CONVOLVE_AVX2_H_
#define THIRD_PARTY_SVT_AV1_CONVOLVE_AVX2_H_
#include "EbMemory_AVX2.h"
#include "EbMemory_SSE4_1.h"
#include "synonyms.h"
#include "aom_dsp/aom_filter.h"
#include "aom_dsp/x86/convolve_avx2.h"
#include "aom_dsp/x86/mem_sse2.h"
static inline void populate_coeffs_4tap_avx2(const __m128i coeffs_128,
__m256i coeffs[2]) { … }
static inline void populate_coeffs_6tap_avx2(const __m128i coeffs_128,
__m256i coeffs[3]) { … }
static inline void populate_coeffs_8tap_avx2(const __m128i coeffs_128,
__m256i coeffs[4]) { … }
static inline void prepare_half_coeffs_2tap_ssse3(
const InterpFilterParams *const filter_params, const int32_t subpel_q4,
__m128i *const coeffs ) { … }
static inline void prepare_half_coeffs_4tap_ssse3(
const InterpFilterParams *const filter_params, const int32_t subpel_q4,
__m128i *const coeffs ) { … }
static inline void prepare_half_coeffs_6tap_ssse3(
const InterpFilterParams *const filter_params, const int32_t subpel_q4,
__m128i *const coeffs ) { … }
static inline void prepare_half_coeffs_8tap_ssse3(
const InterpFilterParams *const filter_params, const int32_t subpel_q4,
__m128i *const coeffs ) { … }
static inline void prepare_half_coeffs_2tap_avx2(
const InterpFilterParams *const filter_params, const int32_t subpel_q4,
__m256i *const coeffs ) { … }
static inline void prepare_half_coeffs_4tap_avx2(
const InterpFilterParams *const filter_params, const int32_t subpel_q4,
__m256i *const coeffs ) { … }
static inline void prepare_half_coeffs_6tap_avx2(
const InterpFilterParams *const filter_params, const int32_t subpel_q4,
__m256i *const coeffs ) { … }
static inline void prepare_half_coeffs_8tap_avx2(
const InterpFilterParams *const filter_params, const int32_t subpel_q4,
__m256i *const coeffs ) { … }
static inline void prepare_coeffs_2tap_sse2(
const InterpFilterParams *const filter_params, const int32_t subpel_q4,
__m128i *const coeffs ) { … }
static inline void prepare_coeffs_4tap_sse2(
const InterpFilterParams *const filter_params, const int32_t subpel_q4,
__m128i *const coeffs ) { … }
static inline void prepare_coeffs_6tap_ssse3(
const InterpFilterParams *const filter_params, const int32_t subpel_q4,
__m128i *const coeffs ) { … }
static inline void prepare_coeffs_8tap_sse2(
const InterpFilterParams *const filter_params, const int32_t subpel_q4,
__m128i *const coeffs ) { … }
static inline void prepare_coeffs_2tap_avx2(
const InterpFilterParams *const filter_params, const int32_t subpel_q4,
__m256i *const coeffs ) { … }
static inline void prepare_coeffs_4tap_avx2(
const InterpFilterParams *const filter_params, const int32_t subpel_q4,
__m256i *const coeffs ) { … }
static inline void prepare_coeffs_6tap_avx2(
const InterpFilterParams *const filter_params, const int32_t subpel_q4,
__m256i *const coeffs ) { … }
static inline void prepare_coeffs_8tap_avx2(
const InterpFilterParams *const filter_params, const int32_t subpel_q4,
__m256i *const coeffs ) { … }
static inline void load_16bit_5rows_avx2(const int16_t *const src,
const ptrdiff_t stride,
__m256i dst[5]) { … }
static inline void load_16bit_7rows_avx2(const int16_t *const src,
const ptrdiff_t stride,
__m256i dst[7]) { … }
static AOM_FORCE_INLINE void load_16bit_8rows_avx2(const int16_t *const src,
const ptrdiff_t stride,
__m256i dst[8]) { … }
static AOM_FORCE_INLINE void loadu_unpack_16bit_5rows_avx2(
const int16_t *const src, const ptrdiff_t stride, __m256i s_256[5],
__m256i ss_256[5], __m256i tt_256[5]) { … }
static AOM_FORCE_INLINE void loadu_unpack_16bit_3rows_avx2(
const int16_t *const src, const ptrdiff_t stride, __m256i s_256[3],
__m256i ss_256[3], __m256i tt_256[3]) { … }
static inline void convolve_8tap_unpack_avx2(const __m256i s[6],
__m256i ss[7]) { … }
static inline __m128i convolve_2tap_ssse3(const __m128i ss[1],
const __m128i coeffs[1]) { … }
static inline __m128i convolve_4tap_ssse3(const __m128i ss[2],
const __m128i coeffs[2]) { … }
static inline __m128i convolve_6tap_ssse3(const __m128i ss[3],
const __m128i coeffs[3]) { … }
static inline __m128i convolve_8tap_ssse3(const __m128i ss[4],
const __m128i coeffs[4]) { … }
static inline __m256i convolve_2tap_avx2(const __m256i ss[1],
const __m256i coeffs[1]) { … }
static inline __m256i convolve_4tap_avx2(const __m256i ss[2],
const __m256i coeffs[2]) { … }
static inline __m256i convolve_6tap_avx2(const __m256i ss[3],
const __m256i coeffs[3]) { … }
static inline __m256i convolve_8tap_avx2(const __m256i ss[4],
const __m256i coeffs[4]) { … }
static inline __m128i convolve16_2tap_sse2(const __m128i ss[1],
const __m128i coeffs[1]) { … }
static inline __m128i convolve16_4tap_sse2(const __m128i ss[2],
const __m128i coeffs[2]) { … }
static inline __m128i convolve16_6tap_sse2(const __m128i ss[3],
const __m128i coeffs[3]) { … }
static inline __m128i convolve16_8tap_sse2(const __m128i ss[4],
const __m128i coeffs[4]) { … }
static inline __m256i convolve16_2tap_avx2(const __m256i ss[1],
const __m256i coeffs[1]) { … }
static inline __m256i convolve16_4tap_avx2(const __m256i ss[2],
const __m256i coeffs[2]) { … }
static inline __m256i convolve16_6tap_avx2(const __m256i ss[3],
const __m256i coeffs[3]) { … }
static inline __m256i convolve16_8tap_avx2(const __m256i ss[4],
const __m256i coeffs[4]) { … }
static inline __m256i x_convolve_4tap_avx2(const __m256i data,
const __m256i coeffs[2],
const __m256i filt[2]) { … }
static inline __m256i x_convolve_6tap_avx2(const __m256i data,
const __m256i coeffs[3],
const __m256i filt[3]) { … }
static inline __m256i x_convolve_8tap_avx2(const __m256i data,
const __m256i coeffs[4],
const __m256i filt[4]) { … }
static inline __m256i sr_y_round_avx2(const __m256i src) { … }
static inline __m128i xy_x_round_sse2(const __m128i src) { … }
static inline __m256i xy_x_round_avx2(const __m256i src) { … }
static inline void xy_x_round_store_2x2_sse2(const __m128i res,
int16_t *const dst) { … }
static inline void xy_x_round_store_4x2_sse2(const __m128i res,
int16_t *const dst) { … }
static inline void xy_x_round_store_8x2_sse2(const __m128i res[2],
int16_t *const dst) { … }
static inline void xy_x_round_store_8x2_avx2(const __m256i res,
int16_t *const dst) { … }
static inline void xy_x_round_store_32_avx2(const __m256i res[2],
int16_t *const dst) { … }
static inline __m128i xy_y_round_sse2(const __m128i src) { … }
static inline __m128i xy_y_round_half_pel_sse2(const __m128i src) { … }
static inline __m256i xy_y_round_avx2(const __m256i src) { … }
static inline __m256i xy_y_round_16_avx2(const __m256i r[2]) { … }
static inline __m256i xy_y_round_half_pel_avx2(const __m256i src) { … }
static inline void pack_store_2x2_sse2(const __m128i res, uint8_t *const dst,
const ptrdiff_t stride) { … }
static inline void pack_store_4x2_sse2(const __m128i res, uint8_t *const dst,
const ptrdiff_t stride) { … }
static inline void pack_store_4x2_avx2(const __m256i res, uint8_t *const dst,
const ptrdiff_t stride) { … }
static inline void pack_store_8x2_avx2(const __m256i res, uint8_t *const dst,
const ptrdiff_t stride) { … }
static inline void pack_store_16x2_avx2(const __m256i res0, const __m256i res1,
uint8_t *const dst,
const ptrdiff_t stride) { … }
static inline void xy_y_pack_store_16x2_avx2(const __m256i res0,
const __m256i res1,
uint8_t *const dst,
const ptrdiff_t stride) { … }
static inline void pack_store_32_avx2(const __m256i res0, const __m256i res1,
uint8_t *const dst) { … }
static inline void xy_y_round_store_2x2_sse2(const __m128i res,
uint8_t *const dst,
const ptrdiff_t stride) { … }
static inline void xy_y_round_store_4x2_avx2(const __m256i res,
uint8_t *const dst,
const ptrdiff_t stride) { … }
static inline void xy_y_pack_store_32_avx2(const __m256i res0,
const __m256i res1,
uint8_t *const dst) { … }
static inline void xy_y_round_store_32_avx2(const __m256i r0[2],
const __m256i r1[2],
uint8_t *const dst) { … }
static inline void convolve_store_32_avx2(const __m256i res0,
const __m256i res1,
uint8_t *const dst) { … }
static inline __m128i sr_x_round_sse2(const __m128i src) { … }
static inline __m256i sr_x_round_avx2(const __m256i src) { … }
static inline __m128i sr_y_round_sse2(const __m128i src) { … }
static inline void sr_x_round_store_8x2_avx2(const __m256i res,
uint8_t *const dst,
const ptrdiff_t dst_stride) { … }
static inline void sr_x_round_store_16x2_avx2(const __m256i res[2],
uint8_t *const dst,
const ptrdiff_t dst_stride) { … }
static inline void sr_x_round_store_32_avx2(const __m256i res[2],
uint8_t *const dst) { … }
static inline void sr_y_round_store_8x2_avx2(const __m256i res,
uint8_t *const dst,
const ptrdiff_t dst_stride) { … }
static inline void sr_y_round_store_16x2_avx2(const __m256i res[2],
uint8_t *const dst,
const ptrdiff_t dst_stride) { … }
static inline void sr_y_2tap_32_avg_avx2(const uint8_t *const src,
const __m256i s0, __m256i *const s1,
uint8_t *const dst) { … }
static inline void sr_x_2tap_32_avg_avx2(const uint8_t *const src,
uint8_t *const dst) { … }
static inline __m128i x_convolve_2tap_2x2_sse4_1(const uint8_t *const src,
const ptrdiff_t stride,
const __m128i coeffs[1]) { … }
static inline __m128i x_convolve_2tap_4x2_ssse3(const uint8_t *const src,
const ptrdiff_t stride,
const __m128i coeffs[1]) { … }
static inline void x_convolve_2tap_8x2_ssse3(const uint8_t *const src,
const ptrdiff_t stride,
const __m128i coeffs[1],
__m128i r[2]) { … }
static inline __m256i x_convolve_2tap_8x2_avx2(const uint8_t *const src,
const ptrdiff_t stride,
const __m256i coeffs[1]) { … }
static inline void x_convolve_2tap_16x2_avx2(const uint8_t *const src,
const ptrdiff_t stride,
const __m256i coeffs[1],
__m256i r[2]) { … }
static inline void x_convolve_2tap_32_avx2(const uint8_t *const src,
const __m256i coeffs[1],
__m256i r[2]) { … }
static inline __m128i x_convolve_4tap_2x2_ssse3(const uint8_t *const src,
const ptrdiff_t stride,
const __m128i coeffs[2]) { … }
static inline __m128i x_convolve_4tap_4x2_ssse3(const uint8_t *const src,
const ptrdiff_t stride,
const __m128i coeffs[2]) { … }
static inline __m256i x_convolve_4tap_8x2_avx2(const uint8_t *const src,
const ptrdiff_t stride,
const __m256i coeffs[2],
const __m256i filt[2]) { … }
static inline void x_convolve_4tap_16x2_avx2(const uint8_t *const src,
const int32_t src_stride,
const __m256i coeffs[2],
const __m256i filt[2],
__m256i r[2]) { … }
static inline void x_convolve_4tap_32_avx2(const uint8_t *const src,
const __m256i coeffs[2],
const __m256i filt[2],
__m256i r[2]) { … }
static inline __m128i x_convolve_6tap_2x2_ssse3(const uint8_t *const src,
const ptrdiff_t stride,
const __m128i coeffs[3]) { … }
static inline __m128i x_convolve_6tap_4x2_ssse3(const uint8_t *const src,
const ptrdiff_t stride,
const __m128i coeffs[3]) { … }
static inline __m256i x_convolve_6tap_8x2_avx2(const uint8_t *const src,
const ptrdiff_t stride,
const __m256i coeffs[3],
const __m256i filt[3]) { … }
static inline void x_convolve_6tap_16x2_avx2(const uint8_t *const src,
const int32_t src_stride,
const __m256i coeffs[3],
const __m256i filt[3],
__m256i r[2]) { … }
static inline void x_convolve_6tap_32_avx2(const uint8_t *const src,
const __m256i coeffs[3],
const __m256i filt[3],
__m256i r[2]) { … }
static inline __m256i x_convolve_8tap_8x2_avx2(const uint8_t *const src,
const ptrdiff_t stride,
const __m256i coeffs[4],
const __m256i filt[4]) { … }
static AOM_FORCE_INLINE void x_convolve_8tap_16x2_avx2(const uint8_t *const src,
const int32_t src_stride,
const __m256i coeffs[4],
const __m256i filt[4],
__m256i r[2]) { … }
static AOM_FORCE_INLINE void x_convolve_8tap_32_avx2(const uint8_t *const src,
const __m256i coeffs[4],
const __m256i filt[4],
__m256i r[2]) { … }
static inline __m128i y_convolve_2tap_2x2_ssse3(const uint8_t *const src,
const ptrdiff_t stride,
const __m128i coeffs[1],
__m128i s_16[2]) { … }
static inline __m128i y_convolve_2tap_4x2_ssse3(const uint8_t *const src,
const ptrdiff_t stride,
const __m128i coeffs[1],
__m128i s_32[2]) { … }
static inline __m256i y_convolve_2tap_8x2_avx2(const uint8_t *const src,
const ptrdiff_t stride,
const __m256i coeffs[1],
__m128i s_64[2]) { … }
static inline void y_convolve_2tap_16x2_avx2(const uint8_t *const src,
const ptrdiff_t stride,
const __m256i coeffs[1],
__m128i s_128[2], __m256i r[2]) { … }
static inline void y_convolve_2tap_32_avx2(const uint8_t *const src,
const __m256i coeffs[1],
const __m256i s0, __m256i *const s1,
__m256i r[2]) { … }
static inline __m128i y_convolve_4tap_2x2_ssse3(const uint8_t *const src,
const ptrdiff_t stride,
const __m128i coeffs[2],
__m128i s_16[4],
__m128i ss_128[2]) { … }
static inline __m128i y_convolve_4tap_4x2_ssse3(const uint8_t *const src,
const ptrdiff_t stride,
const __m128i coeffs[2],
__m128i s_32[4],
__m128i ss_128[2]) { … }
static inline __m256i y_convolve_4tap_8x2_avx2(const uint8_t *const src,
const ptrdiff_t stride,
const __m256i coeffs[2],
__m128i s_64[4],
__m256i ss_256[2]) { … }
static inline void y_convolve_4tap_16x2_avx2(const uint8_t *const src,
const ptrdiff_t stride,
const __m256i coeffs[2],
__m128i s_128[4],
__m256i ss_256[4], __m256i r[2]) { … }
static inline __m128i y_convolve_6tap_2x2_ssse3(const uint8_t *const src,
const ptrdiff_t stride,
const __m128i coeffs[3],
__m128i s_16[6],
__m128i ss_128[3]) { … }
static inline void y_convolve_4tap_32x2_avx2(
const uint8_t *const src, const ptrdiff_t stride, const __m256i coeffs[2],
__m256i s_256[4], __m256i ss_256[4], __m256i tt_256[4], __m256i r[4]) { … }
static inline __m128i y_convolve_6tap_4x2_ssse3(const uint8_t *const src,
const ptrdiff_t stride,
const __m128i coeffs[3],
__m128i s_32[6],
__m128i ss_128[3]) { … }
static inline __m256i y_convolve_6tap_8x2_avx2(const uint8_t *const src,
const ptrdiff_t stride,
const __m256i coeffs[3],
__m128i s_64[6],
__m256i ss_256[3]) { … }
static inline void y_convolve_6tap_16x2_avx2(const uint8_t *const src,
const ptrdiff_t stride,
const __m256i coeffs[3],
__m128i s_128[6],
__m256i ss_256[6], __m256i r[2]) { … }
static inline void y_convolve_6tap_32x2_avx2(
const uint8_t *const src, const ptrdiff_t stride, const __m256i coeffs[3],
__m256i s_256[6], __m256i ss_256[6], __m256i tt_256[6], __m256i r[4]) { … }
static inline __m128i y_convolve_8tap_2x2_ssse3(const uint8_t *const src,
const ptrdiff_t stride,
const __m128i coeffs[4],
__m128i s_16[8],
__m128i ss_128[4]) { … }
static inline __m128i y_convolve_8tap_4x2_ssse3(const uint8_t *const src,
const ptrdiff_t stride,
const __m128i coeffs[4],
__m128i s_32[8],
__m128i ss_128[4]) { … }
static inline __m256i y_convolve_8tap_8x2_avx2(const uint8_t *const src,
const ptrdiff_t stride,
const __m256i coeffs[4],
__m128i s_64[8],
__m256i ss_256[4]) { … }
static inline void y_convolve_8tap_16x2_avx2(const uint8_t *const src,
const ptrdiff_t stride,
const __m256i coeffs[4],
__m128i s_128[8],
__m256i ss_256[8], __m256i r[2]) { … }
static inline void y_convolve_8tap_32x2_avx2(
const uint8_t *const src, const ptrdiff_t stride, const __m256i coeffs[4],
__m256i s_256[8], __m256i ss_256[8], __m256i tt_256[8], __m256i r[4]) { … }
static inline void xy_x_convolve_2tap_32_avx2(const uint8_t *const src,
const __m256i coeffs[1],
__m256i r[2]) { … }
static inline void xy_x_2tap_32_avx2(const uint8_t *const src,
const __m256i coeffs[1],
int16_t *const dst) { … }
static inline void xy_x_4tap_32_avx2(const uint8_t *const src,
const __m256i coeffs[2],
const __m256i filt[2],
int16_t *const dst) { … }
static inline void xy_x_6tap_32_avx2(const uint8_t *const src,
const __m256i coeffs[3],
const __m256i filt[3],
int16_t *const dst) { … }
static inline void xy_x_8tap_32_avx2(const uint8_t *const src,
const __m256i coeffs[4],
const __m256i filt[4],
int16_t *const dst) { … }
static inline __m128i xy_y_convolve_2tap_2x2_sse2(const int16_t *const src,
__m128i s_32[2],
const __m128i coeffs[1]) { … }
static inline __m128i xy_y_convolve_2tap_2x2_half_pel_sse2(
const int16_t *const src, __m128i s_32[2]) { … }
static inline void xy_y_convolve_2tap_4x2_sse2(const int16_t *const src,
__m128i s_64[2],
const __m128i coeffs[1],
__m128i r[2]) { … }
static inline __m128i xy_y_convolve_2tap_4x2_half_pel_sse2(
const int16_t *const src, __m128i s_64[2]) { … }
static inline void xy_y_convolve_2tap_16_avx2(const __m256i s0,
const __m256i s1,
const __m256i coeffs[1],
__m256i r[2]) { … }
static inline void xy_y_convolve_2tap_8x2_avx2(const int16_t *const src,
__m128i s_128[2],
const __m256i coeffs[1],
__m256i r[2]) { … }
static inline __m256i xy_y_convolve_2tap_8x2_half_pel_avx2(
const int16_t *const src, __m128i s_128[2]) { … }
static inline void xy_y_convolve_2tap_16x2_half_pel_avx2(
const int16_t *const src, __m256i s_256[2], __m256i r[2]) { … }
static inline void xy_y_store_16x2_avx2(const __m256i r[2], uint8_t *const dst,
const ptrdiff_t stride) { … }
static inline void xy_y_convolve_2tap_16x2_avx2(const int16_t *const src,
__m256i s[2],
const __m256i coeffs[1],
__m256i r[4]) { … }
static inline void xy_y_convolve_2tap_32_avx2(const int16_t *const src,
const __m256i s0[2],
__m256i s1[2],
const __m256i coeffs[1],
__m256i r[4]) { … }
static inline void xy_y_convolve_2tap_32_all_avx2(const int16_t *const src,
const __m256i s0[2],
__m256i s1[2],
const __m256i coeffs[1],
uint8_t *const dst) { … }
static inline void xy_y_convolve_2tap_half_pel_32_avx2(const int16_t *const src,
const __m256i s0[2],
__m256i s1[2],
__m256i r[2]) { … }
static inline void xy_y_convolve_2tap_half_pel_32_all_avx2(
const int16_t *const src, const __m256i s0[2], __m256i s1[2],
uint8_t *const dst) { … }
static inline __m128i xy_y_convolve_4tap_2x2_sse2(const int16_t *const src,
__m128i s_32[4],
__m128i ss_128[2],
const __m128i coeffs[2]) { … }
static inline __m256i xy_y_convolve_4tap_4x2_avx2(const int16_t *const src,
__m128i s_64[4],
__m256i ss_256[2],
const __m256i coeffs[2]) { … }
static inline void xy_y_convolve_4tap_16_avx2(const __m256i *const ss,
const __m256i coeffs[2],
__m256i r[2]) { … }
static inline void xy_y_convolve_4tap_8x2_avx2(const int16_t *const src,
__m256i ss_256[4],
const __m256i coeffs[2],
__m256i r[2]) { … }
static inline void xy_y_convolve_4tap_8x2_half_pel_avx2(
const int16_t *const src, const __m256i coeffs[1], __m256i s_256[4],
__m256i r[2]) { … }
static inline void xy_y_convolve_4tap_16x2_avx2(
const int16_t *const src, __m256i s_256[4], __m256i ss_256[4],
__m256i tt_256[4], const __m256i coeffs[2], __m256i r[4]) { … }
static inline void xy_y_convolve_4tap_32x2_avx2(
const int16_t *const src, const ptrdiff_t stride, __m256i s_256[4],
__m256i ss_256[4], __m256i tt_256[4], const __m256i coeffs[2],
__m256i r[4]) { … }
static inline void xy_y_convolve_4tap_16x2_half_pelavx2(
const int16_t *const src, __m256i s_256[5], const __m256i coeffs[1],
__m256i r[4]) { … }
static inline __m128i xy_y_convolve_6tap_2x2_sse2(const int16_t *const src,
__m128i s_32[6],
__m128i ss_128[3],
const __m128i coeffs[3]) { … }
static inline __m256i xy_y_convolve_6tap_4x2_avx2(const int16_t *const src,
__m128i s_64[6],
__m256i ss_256[3],
const __m256i coeffs[3]) { … }
static inline void xy_y_convolve_6tap_16_avx2(const __m256i ss[6],
const __m256i coeffs[3],
__m256i r[2]) { … }
static inline void xy_y_convolve_6tap_8x2_avx2(const int16_t *const src,
__m256i ss_256[6],
const __m256i coeffs[3],
__m256i r[2]) { … }
static inline void xy_y_convolve_6tap_8x2_half_pel_avx2(
const int16_t *const src, const __m256i coeffs[2], __m256i s_256[6],
__m256i r[2]) { … }
static inline void xy_y_convolve_6tap_16x2_avx2(
const int16_t *const src, const ptrdiff_t stride, __m256i s_256[6],
__m256i ss_256[6], __m256i tt_256[6], const __m256i coeffs[3],
__m256i r[4]) { … }
static inline void xy_y_convolve_6tap_16x2_half_pel_avx2(
const int16_t *const src, const ptrdiff_t stride, __m256i s_256[6],
__m256i ss_256[4], const __m256i coeffs[2], __m256i r[4]) { … }
static inline __m128i xy_y_convolve_8tap_2x2_sse2(const int16_t *const src,
__m128i s_32[8],
__m128i ss_128[4],
const __m128i coeffs[4]) { … }
static inline __m256i xy_y_convolve_8tap_4x2_avx2(const int16_t *const src,
__m128i s_64[8],
__m256i ss_256[4],
const __m256i coeffs[4]) { … }
static inline void xy_y_convolve_8tap_16_avx2(const __m256i *const ss,
const __m256i coeffs[4],
__m256i r[2]) { … }
static inline void xy_y_convolve_8tap_8x2_avx2(const int16_t *const src,
__m256i ss_256[8],
const __m256i coeffs[4],
__m256i r[2]) { … }
static inline void xy_y_convolve_8tap_8x2_half_pel_avx2(
const int16_t *const src, const __m256i coeffs[2], __m256i s_256[8],
__m256i r[2]) { … }
static AOM_FORCE_INLINE void xy_y_convolve_8tap_16x2_avx2(
const int16_t *const src, const ptrdiff_t stride, const __m256i coeffs[4],
__m256i s_256[8], __m256i ss_256[8], __m256i tt_256[8], __m256i r[4]) { … }
static inline void xy_y_convolve_8tap_16x2_half_pel_avx2(
const int16_t *const src, const ptrdiff_t stride, const __m256i coeffs[4],
__m256i s_256[8], __m256i r[4]) { … }
static inline void xy_y_round_store_8x2_avx2(const __m256i res[2],
uint8_t *const dst,
const ptrdiff_t stride) { … }
static inline void xy_y_round_store_16x2_avx2(const __m256i res[4],
uint8_t *const dst,
const ptrdiff_t stride) { … }
static inline void sr_y_round_store_32_avx2(const __m256i res[2],
uint8_t *const dst) { … }
static inline void sr_y_round_store_32x2_avx2(const __m256i res[4],
uint8_t *const dst,
const int32_t dst_stride) { … }
static inline void sr_y_2tap_32_avx2(const uint8_t *const src,
const __m256i coeffs[1], const __m256i s0,
__m256i *const s1, uint8_t *const dst) { … }
static AOM_FORCE_INLINE void av1_convolve_y_sr_specialized_avx2(
const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride,
int32_t w, int32_t h, const InterpFilterParams *filter_params_y,
const int32_t subpel_y_q4) { … }
static inline void sr_x_2tap_32_avx2(const uint8_t *const src,
const __m256i coeffs[1],
uint8_t *const dst) { … }
static inline void sr_x_6tap_32_avx2(const uint8_t *const src,
const __m256i coeffs[3],
const __m256i filt[3],
uint8_t *const dst) { … }
static AOM_FORCE_INLINE void sr_x_8tap_32_avx2(const uint8_t *const src,
const __m256i coeffs[4],
const __m256i filt[4],
uint8_t *const dst) { … }
static AOM_FORCE_INLINE void av1_convolve_x_sr_specialized_avx2(
const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride,
int32_t w, int32_t h, const InterpFilterParams *filter_params_x,
const int32_t subpel_x_q4, ConvolveParams *conv_params) { … }
#endif