#ifndef VPX_VPX_DSP_X86_HIGHBD_INV_TXFM_SSE2_H_
#define VPX_VPX_DSP_X86_HIGHBD_INV_TXFM_SSE2_H_
#include <emmintrin.h>
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
#include "vpx_dsp/inv_txfm.h"
#include "vpx_dsp/x86/transpose_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
static INLINE void extend_64bit(const __m128i in,
__m128i *const out ) { … }
static INLINE __m128i wraplow_16bit_shift4(const __m128i in0, const __m128i in1,
const __m128i rounding) { … }
static INLINE __m128i wraplow_16bit_shift5(const __m128i in0, const __m128i in1,
const __m128i rounding) { … }
static INLINE __m128i dct_const_round_shift_64bit(const __m128i in) { … }
static INLINE __m128i pack_4(const __m128i in0, const __m128i in1) { … }
static INLINE void abs_extend_64bit_sse2(const __m128i in,
__m128i *const out ,
__m128i *const sign ) { … }
static INLINE __m128i multiply_apply_sign_sse2(const __m128i in,
const __m128i sign,
const __m128i cospi) { … }
static INLINE __m128i multiplication_round_shift_sse2(
const __m128i *const in , const __m128i *const sign ,
const int c) { … }
static INLINE __m128i multiplication_neg_round_shift_sse2(
const __m128i *const in , const __m128i *const sign ,
const int c) { … }
static INLINE void highbd_butterfly_sse2(const __m128i in0, const __m128i in1,
const int c0, const int c1,
__m128i *const out0,
__m128i *const out1) { … }
static INLINE void highbd_partial_butterfly_sse2(const __m128i in, const int c0,
const int c1,
__m128i *const out0,
__m128i *const out1) { … }
static INLINE void highbd_partial_butterfly_neg_sse2(const __m128i in,
const int c0, const int c1,
__m128i *const out0,
__m128i *const out1) { … }
static INLINE void highbd_butterfly_cospi16_sse2(const __m128i in0,
const __m128i in1,
__m128i *const out0,
__m128i *const out1) { … }
static INLINE void highbd_add_sub_butterfly(const __m128i *in, __m128i *out,
int size) { … }
static INLINE void highbd_idct8_stage4(const __m128i *const in,
__m128i *const out) { … }
static INLINE void highbd_idct8x8_final_round(__m128i *const io) { … }
static INLINE void highbd_idct16_4col_stage7(const __m128i *const in,
__m128i *const out) { … }
static INLINE __m128i add_clamp(const __m128i in0, const __m128i in1,
const int bd) { … }
static INLINE void highbd_idct_1_add_kernel(const tran_low_t *input,
uint16_t *dest, int stride, int bd,
const int size) { … }
static INLINE void recon_and_store_4(const __m128i in, uint16_t *const dest,
const int bd) { … }
static INLINE void recon_and_store_4x2(const __m128i in, uint16_t *const dest,
const int stride, const int bd) { … }
static INLINE void recon_and_store_4x4(const __m128i *const in, uint16_t *dest,
const int stride, const int bd) { … }
static INLINE void recon_and_store_8(const __m128i in, uint16_t **const dest,
const int stride, const int bd) { … }
static INLINE void recon_and_store_8x8(const __m128i *const in, uint16_t *dest,
const int stride, const int bd) { … }
static INLINE __m128i load_pack_8_32bit(const tran_low_t *const input) { … }
static INLINE void highbd_load_pack_transpose_32bit_8x8(const tran_low_t *input,
const int stride,
__m128i *const in) { … }
static INLINE void highbd_load_transpose_32bit_8x4(const tran_low_t *input,
const int stride,
__m128i *in) { … }
static INLINE void highbd_load_transpose_32bit_4x4(const tran_low_t *input,
const int stride,
__m128i *in) { … }
static INLINE void highbd_write_buffer_8(uint16_t *dest, const __m128i in,
const int bd) { … }
static INLINE void highbd_write_buffer_4(uint16_t *const dest, const __m128i in,
const int bd) { … }
#endif