#ifndef AOM_AOM_DSP_X86_BLEND_MASK_SSE4_H_
#define AOM_AOM_DSP_X86_BLEND_MASK_SSE4_H_
#include <smmintrin.h>
#include <assert.h>
#include "aom/aom_integer.h"
#include "aom_ports/mem.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_dsp/blend.h"
#include "aom_dsp/x86/synonyms.h"
#include "config/aom_dsp_rtcd.h"
static inline void blend_a64_d16_mask_w4_sse41(
uint8_t *dst, const CONV_BUF_TYPE *src0, const CONV_BUF_TYPE *src1,
const __m128i *m, const __m128i *v_round_offset, const __m128i *v_maxval,
int shift) { … }
static inline void blend_a64_d16_mask_w8_sse41(
uint8_t *dst, const CONV_BUF_TYPE *src0, const CONV_BUF_TYPE *src1,
const __m128i *m, const __m128i *v_round_offset, const __m128i *v_maxval,
int shift) { … }
static inline void aom_lowbd_blend_a64_d16_mask_subw0_subh0_w4_sse4_1(
uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride, int h,
const __m128i *round_offset, int shift) { … }
static inline void aom_lowbd_blend_a64_d16_mask_subw0_subh0_w8_sse4_1(
uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride, int h,
const __m128i *round_offset, int shift) { … }
static inline void aom_lowbd_blend_a64_d16_mask_subw1_subh1_w4_sse4_1(
uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride, int h,
const __m128i *round_offset, int shift) { … }
static inline void aom_lowbd_blend_a64_d16_mask_subw1_subh1_w8_sse4_1(
uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride, int h,
const __m128i *round_offset, int shift) { … }
static inline void aom_lowbd_blend_a64_d16_mask_subw1_subh0_w4_sse4_1(
uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride, int h,
const __m128i *round_offset, int shift) { … }
static inline void aom_lowbd_blend_a64_d16_mask_subw1_subh0_w8_sse4_1(
uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride, int h,
const __m128i *round_offset, int shift) { … }
static inline void aom_lowbd_blend_a64_d16_mask_subw0_subh1_w4_sse4_1(
uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride, int h,
const __m128i *round_offset, int shift) { … }
static inline void aom_lowbd_blend_a64_d16_mask_subw0_subh1_w8_sse4_1(
uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride, int h,
const __m128i *round_offset, int shift) { … }
#endif