#include <immintrin.h>
#include "config/aom_dsp_rtcd.h"
#include "aom_dsp/x86/convolve.h"
#include "aom_dsp/x86/convolve_avx2.h"
#include "aom_dsp/x86/synonyms_avx2.h"
#include "aom_ports/mem.h"
#if defined(__clang__)
#if (__clang_major__ > 0 && __clang_major__ < 3) || \
(__clang_major__ == 3 && __clang_minor__ <= 3) || \
(defined(__APPLE__) && defined(__apple_build_version__) && \
((__clang_major__ == 4 && __clang_minor__ <= 2) || \
(__clang_major__ == 5 && __clang_minor__ == 0)))
#define MM256_BROADCASTSI128_SI256 …
#else
#define MM256_BROADCASTSI128_SI256(x) …
#endif
#elif defined(__GNUC__)
#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 6)
#define MM256_BROADCASTSI128_SI256 …
#elif __GNUC__ == 4 && __GNUC_MINOR__ == 7
#define MM256_BROADCASTSI128_SI256 …
#else
#define MM256_BROADCASTSI128_SI256 …
#endif
#else
#define MM256_BROADCASTSI128_SI256 …
#endif
static inline void xx_storeu2_epi32(const uint8_t *output_ptr,
const ptrdiff_t stride, const __m256i *a) { … }
static inline __m256i xx_loadu2_epi64(const void *hi, const void *lo) { … }
static inline void xx_storeu2_epi64(const uint8_t *output_ptr,
const ptrdiff_t stride, const __m256i *a) { … }
static inline void xx_store2_mi128(const uint8_t *output_ptr,
const ptrdiff_t stride, const __m256i *a) { … }
static void aom_filter_block1d4_h4_avx2(
const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) { … }
static void aom_filter_block1d4_h8_avx2(
const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) { … }
static void aom_filter_block1d8_h4_avx2(
const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) { … }
static void aom_filter_block1d8_h8_avx2(
const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) { … }
static void aom_filter_block1d16_h4_avx2(
const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) { … }
static void aom_filter_block1d16_h8_avx2(
const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) { … }
static void aom_filter_block1d8_v4_avx2(
const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) { … }
static void aom_filter_block1d8_v8_avx2(
const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) { … }
static void aom_filter_block1d16_v4_avx2(
const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) { … }
static void aom_filter_block1d16_v8_avx2(
const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) { … }
static void aom_filter_block1d4_v4_avx2(
const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) { … }
#if HAVE_AVX2 && HAVE_SSSE3
filter8_1dfunction aom_filter_block1d4_v8_ssse3;
filter8_1dfunction aom_filter_block1d16_v2_ssse3;
filter8_1dfunction aom_filter_block1d16_h2_ssse3;
filter8_1dfunction aom_filter_block1d8_v2_ssse3;
filter8_1dfunction aom_filter_block1d8_h2_ssse3;
filter8_1dfunction aom_filter_block1d4_v2_ssse3;
filter8_1dfunction aom_filter_block1d4_h2_ssse3;
#define aom_filter_block1d4_v8_avx2 …
#define aom_filter_block1d16_v2_avx2 …
#define aom_filter_block1d16_h2_avx2 …
#define aom_filter_block1d8_v2_avx2 …
#define aom_filter_block1d8_h2_avx2 …
#define aom_filter_block1d4_v2_avx2 …
#define aom_filter_block1d4_h2_avx2 …
FUN_CONV_1D(…)
FUN_CONV_1D(…)
#endif