#include <tmmintrin.h>
#include <string.h>
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/vpx_filter.h"
#include "vpx_dsp/x86/convolve.h"
#include "vpx_dsp/x86/convolve_sse2.h"
#include "vpx_dsp/x86/convolve_ssse3.h"
#include "vpx_dsp/x86/mem_sse2.h"
#include "vpx_dsp/x86/transpose_sse2.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
static INLINE __m128i shuffle_filter_convolve8_8_ssse3(
const __m128i *const s, const int16_t *const filter) { … }
#if VPX_ARCH_X86_64
filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3;
filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3;
filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3;
#define vpx_filter_block1d4_h8_ssse3 …
#define vpx_filter_block1d8_h8_ssse3 …
#define vpx_filter_block1d8_v8_ssse3 …
#else
filter8_1dfunction vpx_filter_block1d4_h8_ssse3;
filter8_1dfunction vpx_filter_block1d8_h8_ssse3;
filter8_1dfunction vpx_filter_block1d8_v8_ssse3;
#endif
#if VPX_ARCH_X86_64
void vpx_filter_block1d4_h8_intrin_ssse3(
const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) { … }
void vpx_filter_block1d8_h8_intrin_ssse3(
const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) { … }
void vpx_filter_block1d8_v8_intrin_ssse3(
const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) { … }
#endif
static void vpx_filter_block1d16_h4_ssse3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
ptrdiff_t dst_stride, uint32_t height,
const int16_t *kernel) { … }
static void vpx_filter_block1d16_v4_ssse3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
uint8_t *dst_ptr,
ptrdiff_t dst_stride, uint32_t height,
const int16_t *kernel) { … }
static void vpx_filter_block1d8_h4_ssse3(const uint8_t *src_ptr,
ptrdiff_t src_stride, uint8_t *dst_ptr,
ptrdiff_t dst_stride, uint32_t height,
const int16_t *kernel) { … }
static void vpx_filter_block1d8_v4_ssse3(const uint8_t *src_ptr,
ptrdiff_t src_stride, uint8_t *dst_ptr,
ptrdiff_t dst_stride, uint32_t height,
const int16_t *kernel) { … }
static void vpx_filter_block1d4_h4_ssse3(const uint8_t *src_ptr,
ptrdiff_t src_stride, uint8_t *dst_ptr,
ptrdiff_t dst_stride, uint32_t height,
const int16_t *kernel) { … }
static void vpx_filter_block1d4_v4_ssse3(const uint8_t *src_ptr,
ptrdiff_t src_stride, uint8_t *dst_ptr,
ptrdiff_t dst_stride, uint32_t height,
const int16_t *kernel) { … }
filter8_1dfunction vpx_filter_block1d16_v8_ssse3;
filter8_1dfunction vpx_filter_block1d16_h8_ssse3;
filter8_1dfunction vpx_filter_block1d4_v8_ssse3;
filter8_1dfunction vpx_filter_block1d16_v8_avg_ssse3;
filter8_1dfunction vpx_filter_block1d16_h8_avg_ssse3;
filter8_1dfunction vpx_filter_block1d8_v8_avg_ssse3;
filter8_1dfunction vpx_filter_block1d8_h8_avg_ssse3;
filter8_1dfunction vpx_filter_block1d4_v8_avg_ssse3;
filter8_1dfunction vpx_filter_block1d4_h8_avg_ssse3;
#define vpx_filter_block1d16_v4_avg_ssse3 …
#define vpx_filter_block1d16_h4_avg_ssse3 …
#define vpx_filter_block1d8_v4_avg_ssse3 …
#define vpx_filter_block1d8_h4_avg_ssse3 …
#define vpx_filter_block1d4_v4_avg_ssse3 …
#define vpx_filter_block1d4_h4_avg_ssse3 …
filter8_1dfunction vpx_filter_block1d16_v2_ssse3;
filter8_1dfunction vpx_filter_block1d16_h2_ssse3;
filter8_1dfunction vpx_filter_block1d8_v2_ssse3;
filter8_1dfunction vpx_filter_block1d8_h2_ssse3;
filter8_1dfunction vpx_filter_block1d4_v2_ssse3;
filter8_1dfunction vpx_filter_block1d4_h2_ssse3;
filter8_1dfunction vpx_filter_block1d16_v2_avg_ssse3;
filter8_1dfunction vpx_filter_block1d16_h2_avg_ssse3;
filter8_1dfunction vpx_filter_block1d8_v2_avg_ssse3;
filter8_1dfunction vpx_filter_block1d8_h2_avg_ssse3;
filter8_1dfunction vpx_filter_block1d4_v2_avg_ssse3;
filter8_1dfunction vpx_filter_block1d4_h2_avg_ssse3;
FUN_CONV_1D(…)
FUN_CONV_1D(…)
FUN_CONV_1D(…)
FUN_CONV_1D(…)
static void filter_horiz_w8_ssse3(const uint8_t *const src,
const ptrdiff_t src_stride,
uint8_t *const dst,
const int16_t *const x_filter) { … }
static void transpose8x8_to_dst(const uint8_t *const src,
const ptrdiff_t src_stride, uint8_t *const dst,
const ptrdiff_t dst_stride) { … }
static void scaledconvolve_horiz_w8(const uint8_t *src,
const ptrdiff_t src_stride, uint8_t *dst,
const ptrdiff_t dst_stride,
const InterpKernel *const x_filters,
const int x0_q4, const int x_step_q4,
const int w, const int h) { … }
static void filter_horiz_w4_ssse3(const uint8_t *const src,
const ptrdiff_t src_stride,
uint8_t *const dst,
const int16_t *const filter) { … }
static void transpose4x4_to_dst(const uint8_t *const src,
const ptrdiff_t src_stride, uint8_t *const dst,
const ptrdiff_t dst_stride) { … }
static void scaledconvolve_horiz_w4(const uint8_t *src,
const ptrdiff_t src_stride, uint8_t *dst,
const ptrdiff_t dst_stride,
const InterpKernel *const x_filters,
const int x0_q4, const int x_step_q4,
const int w, const int h) { … }
static __m128i filter_vert_kernel(const __m128i *const s,
const int16_t *const filter) { … }
static void filter_vert_w4_ssse3(const uint8_t *const src,
const ptrdiff_t src_stride, uint8_t *const dst,
const int16_t *const filter) { … }
static void scaledconvolve_vert_w4(
const uint8_t *src, const ptrdiff_t src_stride, uint8_t *const dst,
const ptrdiff_t dst_stride, const InterpKernel *const y_filters,
const int y0_q4, const int y_step_q4, const int w, const int h) { … }
static void filter_vert_w8_ssse3(const uint8_t *const src,
const ptrdiff_t src_stride, uint8_t *const dst,
const int16_t *const filter) { … }
static void scaledconvolve_vert_w8(
const uint8_t *src, const ptrdiff_t src_stride, uint8_t *const dst,
const ptrdiff_t dst_stride, const InterpKernel *const y_filters,
const int y0_q4, const int y_step_q4, const int w, const int h) { … }
static void filter_vert_w16_ssse3(const uint8_t *src,
const ptrdiff_t src_stride,
uint8_t *const dst,
const int16_t *const filter, const int w) { … }
static void scaledconvolve_vert_w16(
const uint8_t *src, const ptrdiff_t src_stride, uint8_t *const dst,
const ptrdiff_t dst_stride, const InterpKernel *const y_filters,
const int y0_q4, const int y_step_q4, const int w, const int h) { … }
void vpx_scaled_2d_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
ptrdiff_t dst_stride, const InterpKernel *filter,
int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
int w, int h) { … }
FUN_CONV_2D(…)
FUN_CONV_2D(…)