#include <tmmintrin.h>
#include "config/aom_dsp_rtcd.h"
#include "aom_dsp/aom_filter.h"
#include "aom_dsp/x86/convolve.h"
#include "aom_dsp/x86/convolve_sse2.h"
#include "aom_dsp/x86/convolve_ssse3.h"
#include "aom_dsp/x86/mem_sse2.h"
#include "aom_dsp/x86/transpose_sse2.h"
#include "aom_mem/aom_mem.h"
#include "aom_ports/mem.h"
#include "aom_ports/emmintrin_compat.h"
DECLARE_ALIGNED(32, static const uint8_t, filt_h4[]) = …;
DECLARE_ALIGNED(32, static const uint8_t, filtd4[]) = …;
static void aom_filter_block1d4_h4_ssse3(
const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) { … }
static void aom_filter_block1d4_v4_ssse3(
const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) { … }
static void aom_filter_block1d8_h4_ssse3(
const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) { … }
static void aom_filter_block1d8_v4_ssse3(
const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) { … }
static void aom_filter_block1d16_h4_ssse3(
const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) { … }
static void aom_filter_block1d16_v4_ssse3(
const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) { … }
static inline __m128i shuffle_filter_convolve8_8_ssse3(
const __m128i *const s, const int16_t *const filter) { … }
static void filter_horiz_w8_ssse3(const uint8_t *const src,
const ptrdiff_t src_stride,
uint8_t *const dst,
const int16_t *const x_filter) { … }
static void transpose8x8_to_dst(const uint8_t *const src,
const ptrdiff_t src_stride, uint8_t *const dst,
const ptrdiff_t dst_stride) { … }
static void scaledconvolve_horiz_w8(const uint8_t *src,
const ptrdiff_t src_stride, uint8_t *dst,
const ptrdiff_t dst_stride,
const InterpKernel *const x_filters,
const int x0_q4, const int x_step_q4,
const int w, const int h) { … }
static void filter_horiz_w4_ssse3(const uint8_t *const src,
const ptrdiff_t src_stride,
uint8_t *const dst,
const int16_t *const filter) { … }
static void transpose4x4_to_dst(const uint8_t *const src,
const ptrdiff_t src_stride, uint8_t *const dst,
const ptrdiff_t dst_stride) { … }
static void scaledconvolve_horiz_w4(const uint8_t *src,
const ptrdiff_t src_stride, uint8_t *dst,
const ptrdiff_t dst_stride,
const InterpKernel *const x_filters,
const int x0_q4, const int x_step_q4,
const int w, const int h) { … }
static __m128i filter_vert_kernel(const __m128i *const s,
const int16_t *const filter) { … }
static void filter_vert_w4_ssse3(const uint8_t *const src,
const ptrdiff_t src_stride, uint8_t *const dst,
const int16_t *const filter) { … }
static void scaledconvolve_vert_w4(
const uint8_t *src, const ptrdiff_t src_stride, uint8_t *const dst,
const ptrdiff_t dst_stride, const InterpKernel *const y_filters,
const int y0_q4, const int y_step_q4, const int w, const int h) { … }
static void filter_vert_w8_ssse3(const uint8_t *const src,
const ptrdiff_t src_stride, uint8_t *const dst,
const int16_t *const filter) { … }
static void scaledconvolve_vert_w8(
const uint8_t *src, const ptrdiff_t src_stride, uint8_t *const dst,
const ptrdiff_t dst_stride, const InterpKernel *const y_filters,
const int y0_q4, const int y_step_q4, const int w, const int h) { … }
static void filter_vert_w16_ssse3(const uint8_t *src,
const ptrdiff_t src_stride,
uint8_t *const dst,
const int16_t *const filter, const int w) { … }
static void scaledconvolve_vert_w16(
const uint8_t *src, const ptrdiff_t src_stride, uint8_t *const dst,
const ptrdiff_t dst_stride, const InterpKernel *const y_filters,
const int y0_q4, const int y_step_q4, const int w, const int h) { … }
void aom_scaled_2d_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
ptrdiff_t dst_stride, const InterpKernel *filter,
int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
int w, int h) { … }
filter8_1dfunction aom_filter_block1d16_v8_ssse3;
filter8_1dfunction aom_filter_block1d16_h8_ssse3;
filter8_1dfunction aom_filter_block1d8_v8_ssse3;
filter8_1dfunction aom_filter_block1d8_h8_ssse3;
filter8_1dfunction aom_filter_block1d4_v8_ssse3;
filter8_1dfunction aom_filter_block1d4_h8_ssse3;
filter8_1dfunction aom_filter_block1d16_v2_ssse3;
filter8_1dfunction aom_filter_block1d16_h2_ssse3;
filter8_1dfunction aom_filter_block1d8_v2_ssse3;
filter8_1dfunction aom_filter_block1d8_h2_ssse3;
filter8_1dfunction aom_filter_block1d4_v2_ssse3;
filter8_1dfunction aom_filter_block1d4_h2_ssse3;
FUN_CONV_1D(…)
FUN_CONV_1D(…)