#ifndef VPX_VPX_DSP_X86_INV_TXFM_SSE2_H_
#define VPX_VPX_DSP_X86_INV_TXFM_SSE2_H_
#include <emmintrin.h>
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
#include "vpx_dsp/inv_txfm.h"
#include "vpx_dsp/x86/transpose_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
static INLINE void idct8x8_12_transpose_16bit_4x8(const __m128i *const in,
__m128i *const out) { … }
static INLINE __m128i dct_const_round_shift_sse2(const __m128i in) { … }
static INLINE __m128i idct_madd_round_shift_sse2(const __m128i in,
const __m128i cospi) { … }
static INLINE __m128i idct_calc_wraplow_sse2(const __m128i in0,
const __m128i in1,
const __m128i x) { … }
static INLINE void butterfly(const __m128i in0, const __m128i in1, const int c0,
const int c1, __m128i *const out0,
__m128i *const out1) { … }
static INLINE __m128i butterfly_cospi16(const __m128i in) { … }
static INLINE __m128i load_input_data4(const tran_low_t *data) { … }
static INLINE __m128i load_input_data8(const tran_low_t *data) { … }
static INLINE void load_transpose_16bit_8x8(const tran_low_t *input,
const int stride,
__m128i *const in) { … }
static INLINE void recon_and_store(uint8_t *const dest, const __m128i in_x) { … }
static INLINE void round_shift_8x8(const __m128i *const in,
__m128i *const out) { … }
static INLINE void write_buffer_8x8(const __m128i *const in,
uint8_t *const dest, const int stride) { … }
static INLINE void recon_and_store4x4_sse2(const __m128i *const in,
uint8_t *const dest,
const int stride) { … }
static INLINE void store_buffer_8x32(__m128i *in, uint8_t *dst, int stride) { … }
static INLINE void write_buffer_8x1(uint8_t *const dest, const __m128i in) { … }
static INLINE void add_sub_butterfly(const __m128i *in, __m128i *out,
int size) { … }
static INLINE void idct8(const __m128i *const in ,
__m128i *const out ) { … }
static INLINE void idct8x8_12_add_kernel_sse2(__m128i *const io ) { … }
static INLINE void idct16_8col(const __m128i *const in ,
__m128i *const out ) { … }
static INLINE void idct16x16_10_pass1(const __m128i *const input ,
__m128i *const output ) { … }
static INLINE void idct16x16_10_pass2(__m128i *const l ,
__m128i *const io ) { … }
static INLINE void idct32_8x32_quarter_2_stage_4_to_6(
__m128i *const step1 , __m128i *const out ) { … }
static INLINE void idct32_8x32_quarter_3_4_stage_4_to_7(
__m128i *const step1 , __m128i *const out ) { … }
void idct4_sse2(__m128i *const in);
void vpx_idct8_sse2(__m128i *const in);
void idct16_sse2(__m128i *const in0, __m128i *const in1);
void iadst4_sse2(__m128i *const in);
void iadst8_sse2(__m128i *const in);
void vpx_iadst16_8col_sse2(__m128i *const in);
void iadst16_sse2(__m128i *const in0, __m128i *const in1);
void idct32_1024_8x32(const __m128i *const in, __m128i *const out);
void idct32_34_8x32_sse2(const __m128i *const in, __m128i *const out);
void idct32_34_8x32_ssse3(const __m128i *const in, __m128i *const out);
#endif