#include <tmmintrin.h>
#include "config/aom_dsp_rtcd.h"
#include "aom_dsp/intrapred_common.h"
static inline __m128i paeth_8x1_pred(const __m128i *left, const __m128i *top,
const __m128i *topleft) { … }
void aom_paeth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) { … }
void aom_paeth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) { … }
void aom_paeth_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) { … }
void aom_paeth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) { … }
void aom_paeth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) { … }
void aom_paeth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) { … }
void aom_paeth_predictor_8x32_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) { … }
static inline __m128i paeth_16x1_pred(const __m128i *left, const __m128i *top0,
const __m128i *top1,
const __m128i *topleft) { … }
void aom_paeth_predictor_16x4_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) { … }
void aom_paeth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) { … }
void aom_paeth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_paeth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_paeth_predictor_16x64_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_paeth_predictor_32x8_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) { … }
void aom_paeth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_paeth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_paeth_predictor_32x64_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_paeth_predictor_64x32_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_paeth_predictor_64x64_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_paeth_predictor_64x16_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
static inline void load_pixel_w4(const uint8_t *above, const uint8_t *left,
int height, __m128i *pixels) { … }
static inline void load_weight_w4(int height, __m128i *weight_h,
__m128i *weight_w) { … }
static inline void smooth_pred_4xh(const __m128i *pixel, const __m128i *wh,
const __m128i *ww, int h, uint8_t *dst,
ptrdiff_t stride, int second_half) { … }
void aom_smooth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) { … }
void aom_smooth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) { … }
void aom_smooth_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
static inline void load_pixel_w8(const uint8_t *above, const uint8_t *left,
int height, __m128i *pixels) { … }
static inline void load_weight_w8(int height, __m128i *weight_h,
__m128i *weight_w) { … }
static inline void smooth_pred_8xh(const __m128i *pixels, const __m128i *wh,
const __m128i *ww, int h, uint8_t *dst,
ptrdiff_t stride, int second_half) { … }
void aom_smooth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) { … }
void aom_smooth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) { … }
void aom_smooth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_smooth_predictor_8x32_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
#if defined(_MSC_VER)
#define LIBAOM_RESTRICT
#else
#define LIBAOM_RESTRICT …
#endif
static AOM_FORCE_INLINE __m128i Load4(const void *src) { … }
static AOM_FORCE_INLINE __m128i LoadLo8(const void *a) { … }
static AOM_FORCE_INLINE __m128i LoadUnaligned16(const void *a) { … }
static AOM_FORCE_INLINE void Store4(void *dst, const __m128i x) { … }
static AOM_FORCE_INLINE void StoreLo8(void *a, const __m128i v) { … }
static AOM_FORCE_INLINE void StoreUnaligned16(void *a, const __m128i v) { … }
static AOM_FORCE_INLINE __m128i cvtepu8_epi16(__m128i x) { … }
static AOM_FORCE_INLINE __m128i cvtepu8_epi32(__m128i x) { … }
static AOM_FORCE_INLINE __m128i cvtepu16_epi32(__m128i x) { … }
static void smooth_predictor_wxh(uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column,
int width, int height) { … }
void aom_smooth_predictor_16x4_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_smooth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_smooth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_smooth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_smooth_predictor_16x64_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_smooth_predictor_32x8_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_smooth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_smooth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_smooth_predictor_32x64_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_smooth_predictor_64x16_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_smooth_predictor_64x32_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
void aom_smooth_predictor_64x64_ssse3(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) { … }
static AOM_FORCE_INLINE void write_smooth_directional_sum16(
uint8_t *LIBAOM_RESTRICT dst, const __m128i pixels1, const __m128i pixels2,
const __m128i weights1, const __m128i weights2,
const __m128i scaled_corner1, const __m128i scaled_corner2,
const __m128i round) { … }
static AOM_FORCE_INLINE __m128i smooth_directional_sum8(
const __m128i pixels, const __m128i weights, const __m128i scaled_corner) { … }
static AOM_FORCE_INLINE void write_smooth_directional_sum8(
uint8_t *LIBAOM_RESTRICT dst, const __m128i *pixels, const __m128i *weights,
const __m128i *scaled_corner, const __m128i *round) { … }
static AOM_FORCE_INLINE void load_smooth_vertical_pixels4(
const uint8_t *LIBAOM_RESTRICT above, const uint8_t *LIBAOM_RESTRICT left,
const int height, __m128i *pixels) { … }
static AOM_FORCE_INLINE void load_smooth_vertical_weights4(
const uint8_t *LIBAOM_RESTRICT weight_array, const int height,
__m128i *weights) { … }
static AOM_FORCE_INLINE void write_smooth_vertical4xh(
const __m128i *pixel, const __m128i *weight, const int height,
uint8_t *LIBAOM_RESTRICT dst, const ptrdiff_t stride) { … }
void aom_smooth_v_predictor_4x4_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_v_predictor_4x8_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_v_predictor_4x16_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_v_predictor_8x4_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_v_predictor_8x8_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_v_predictor_8x16_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_v_predictor_8x32_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_v_predictor_16x4_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_v_predictor_16x8_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_v_predictor_16x16_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_v_predictor_16x32_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_v_predictor_16x64_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_v_predictor_32x8_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_v_predictor_32x16_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_v_predictor_32x32_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_v_predictor_32x64_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_v_predictor_64x16_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_v_predictor_64x32_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_v_predictor_64x64_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
static AOM_FORCE_INLINE void write_smooth_horizontal_sum4(
uint8_t *LIBAOM_RESTRICT dst, const __m128i *left_y, const __m128i *weights,
const __m128i *scaled_top_right, const __m128i *round) { … }
void aom_smooth_h_predictor_4x4_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_h_predictor_4x8_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_h_predictor_4x16_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_h_predictor_8x4_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_h_predictor_8x8_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_h_predictor_8x16_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_h_predictor_8x32_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_h_predictor_16x4_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_h_predictor_16x8_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_h_predictor_16x16_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_h_predictor_16x32_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_h_predictor_16x64_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_h_predictor_32x8_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_h_predictor_32x16_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_h_predictor_32x32_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_h_predictor_32x64_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_h_predictor_64x16_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_h_predictor_64x32_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }
void aom_smooth_h_predictor_64x64_ssse3(
uint8_t *LIBAOM_RESTRICT dst, ptrdiff_t stride,
const uint8_t *LIBAOM_RESTRICT top_row,
const uint8_t *LIBAOM_RESTRICT left_column) { … }