#include <immintrin.h>
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/x86/convolve.h"
#include "vpx_dsp/x86/convolve_avx2.h"
void vpx_highbd_convolve_copy_avx2(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
const InterpKernel *filter, int x0_q4,
int x_step_q4, int y0_q4, int y_step_q4,
int w, int h, int bd) { … }
void vpx_highbd_convolve_avg_avx2(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
const InterpKernel *filter, int x0_q4,
int x_step_q4, int y0_q4, int y_step_q4,
int w, int h, int bd) { … }
static const uint8_t signal_pattern_0[32] = …;
static const uint8_t signal_pattern_1[32] = …;
static const uint8_t signal_pattern_2[32] = …;
static const uint32_t signal_index[8] = …;
#define CONV8_ROUNDING_BITS …
#define CONV8_ROUNDING_NUM …
static INLINE void pack_pixels(const __m256i *s, __m256i *p ) { … }
static INLINE void pack_16_pixels(const __m256i *s0, const __m256i *s1,
__m256i *x ) { … }
static INLINE void pack_8x1_pixels(const uint16_t *src, __m256i *x) { … }
static INLINE void pack_8x2_pixels(const uint16_t *src, ptrdiff_t stride,
__m256i *x) { … }
static INLINE void pack_16x1_pixels(const uint16_t *src, __m256i *x) { … }
static INLINE void pack_filters(const int16_t *filter, __m256i *f ) { … }
static INLINE void filter_8x1_pixels(const __m256i *sig ,
const __m256i *fil ,
__m256i *y) { … }
static INLINE void store_8x1_pixels(const __m256i *y, const __m256i *mask,
uint16_t *dst) { … }
static INLINE void store_8x2_pixels(const __m256i *y0, const __m256i *y1,
const __m256i *mask, uint16_t *dst,
ptrdiff_t pitch) { … }
static INLINE void store_16x1_pixels(const __m256i *y0, const __m256i *y1,
const __m256i *mask, uint16_t *dst) { … }
static void vpx_highbd_filter_block1d8_h8_avx2(
const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { … }
static void vpx_highbd_filter_block1d16_h8_avx2(
const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { … }
static INLINE void pack_2t_filter(const int16_t *filter, __m256i *f) { … }
static INLINE void pack_16_2t_pixels(const __m256i *s0, const __m256i *s1,
__m256i *sig) { … }
static INLINE void pack_8x2_2t_pixels(const uint16_t *src,
const ptrdiff_t pitch, __m256i *sig) { … }
static INLINE void pack_16x1_2t_pixels(const uint16_t *src,
__m256i *sig ) { … }
static INLINE void pack_8x1_2t_pixels(const uint16_t *src,
__m256i *sig ) { … }
static INLINE void filter_16_2t_pixels(const __m256i *sig, const __m256i *f,
__m256i *y0, __m256i *y1) { … }
static INLINE void filter_8x1_2t_pixels(const __m256i *sig, const __m256i *f,
__m256i *y0) { … }
static void vpx_highbd_filter_block1d8_h2_avx2(
const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { … }
static void vpx_highbd_filter_block1d16_h2_avx2(
const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { … }
static void pack_8x9_init(const uint16_t *src, ptrdiff_t pitch, __m256i *sig) { … }
static INLINE void pack_8x9_pixels(const uint16_t *src, ptrdiff_t pitch,
__m256i *sig) { … }
static INLINE void filter_8x9_pixels(const __m256i *sig, const __m256i *f,
__m256i *y0, __m256i *y1) { … }
static INLINE void update_pixels(__m256i *sig) { … }
static void vpx_highbd_filter_block1d8_v8_avx2(
const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { … }
static void pack_16x9_init(const uint16_t *src, ptrdiff_t pitch, __m256i *sig) { … }
static void pack_16x9_pixels(const uint16_t *src, ptrdiff_t pitch,
__m256i *sig) { … }
static INLINE void filter_16x9_pixels(const __m256i *sig, const __m256i *f,
__m256i *y0, __m256i *y1) { … }
static INLINE void store_16x2_pixels(const __m256i *y0, const __m256i *y1,
const __m256i *mask, uint16_t *dst,
ptrdiff_t pitch) { … }
static void update_16x9_pixels(__m256i *sig) { … }
static void vpx_highbd_filter_block1d16_v8_avx2(
const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { … }
static void pack_16x2_init(const uint16_t *src, __m256i *sig) { … }
static INLINE void pack_16x2_2t_pixels(const uint16_t *src, ptrdiff_t pitch,
__m256i *sig) { … }
static INLINE void filter_16x2_2t_pixels(const __m256i *sig, const __m256i *f,
__m256i *y0, __m256i *y1) { … }
static void vpx_highbd_filter_block1d16_v2_avx2(
const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { … }
static INLINE void pack_8x1_2t_filter(const int16_t *filter, __m128i *f) { … }
static void pack_8x2_init(const uint16_t *src, __m128i *sig) { … }
static INLINE void pack_8x2_2t_pixels_ver(const uint16_t *src, ptrdiff_t pitch,
__m128i *sig) { … }
static INLINE void filter_8_2t_pixels(const __m128i *sig, const __m128i *f,
__m128i *y0, __m128i *y1) { … }
static INLINE void store_8x1_2t_pixels_ver(const __m128i *y0, const __m128i *y1,
const __m128i *mask, uint16_t *dst) { … }
static void vpx_highbd_filter_block1d8_v2_avx2(
const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { … }
static INLINE void store_8x1_avg_pixels(const __m256i *y0, const __m256i *mask,
uint16_t *dst) { … }
static INLINE void store_8x2_avg_pixels(const __m256i *y0, const __m256i *y1,
const __m256i *mask, uint16_t *dst,
ptrdiff_t pitch) { … }
static INLINE void store_16x1_avg_pixels(const __m256i *y0, const __m256i *y1,
const __m256i *mask, uint16_t *dst) { … }
static INLINE void store_16x2_avg_pixels(const __m256i *y0, const __m256i *y1,
const __m256i *mask, uint16_t *dst,
ptrdiff_t pitch) { … }
static INLINE void store_8x1_2t_avg_pixels_ver(const __m128i *y0,
const __m128i *y1,
const __m128i *mask,
uint16_t *dst) { … }
static void vpx_highbd_filter_block1d8_h8_avg_avx2(
const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { … }
static void vpx_highbd_filter_block1d16_h8_avg_avx2(
const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { … }
static void vpx_highbd_filter_block1d4_h4_avx2(
const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr,
ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { … }
static void vpx_highbd_filter_block1d8_h4_avx2(
const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr,
ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { … }
static void vpx_highbd_filter_block1d16_h4_avx2(
const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr,
ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { … }
static void vpx_highbd_filter_block1d8_v8_avg_avx2(
const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { … }
static void vpx_highbd_filter_block1d16_v8_avg_avx2(
const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { … }
static void vpx_highbd_filter_block1d8_h2_avg_avx2(
const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { … }
static void vpx_highbd_filter_block1d16_h2_avg_avx2(
const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { … }
static void vpx_highbd_filter_block1d16_v2_avg_avx2(
const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { … }
static void vpx_highbd_filter_block1d8_v2_avg_avx2(
const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { … }
static void vpx_highbd_filter_block1d4_v4_avx2(
const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr,
ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { … }
static void vpx_highbd_filter_block1d8_v4_avx2(
const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr,
ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { … }
static void vpx_highbd_filter_block1d16_v4_avx2(
const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr,
ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { … }
highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_sse2;
highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_sse2;
highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_sse2;
highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_sse2;
#define vpx_highbd_filter_block1d4_h8_avx2 …
#define vpx_highbd_filter_block1d4_h2_avx2 …
#define vpx_highbd_filter_block1d4_v8_avx2 …
#define vpx_highbd_filter_block1d4_v2_avx2 …
#define vpx_highbd_filter_block1d16_v4_avg_avx2 …
#define vpx_highbd_filter_block1d16_h4_avg_avx2 …
#define vpx_highbd_filter_block1d8_v4_avg_avx2 …
#define vpx_highbd_filter_block1d8_h4_avg_avx2 …
#define vpx_highbd_filter_block1d4_v4_avg_avx2 …
#define vpx_highbd_filter_block1d4_h4_avg_avx2 …
HIGH_FUN_CONV_1D(…)
HIGH_FUN_CONV_1D(…)
HIGH_FUN_CONV_2D(…)
highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_avg_sse2;
highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_avg_sse2;
highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_avg_sse2;
highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_avg_sse2;
#define vpx_highbd_filter_block1d4_h8_avg_avx2 …
#define vpx_highbd_filter_block1d4_h2_avg_avx2 …
#define vpx_highbd_filter_block1d4_v8_avg_avx2 …
#define vpx_highbd_filter_block1d4_v2_avg_avx2 …
HIGH_FUN_CONV_1D(…)
HIGH_FUN_CONV_1D(…)
HIGH_FUN_CONV_2D(…)
#undef HIGHBD_FUNC