#ifndef AOM_AV1_COMMON_X86_AV1_TXFM_SSE2_H_
#define AOM_AV1_COMMON_X86_AV1_TXFM_SSE2_H_
#include <emmintrin.h>
#include "config/aom_config.h"
#include "config/av1_rtcd.h"
#include "aom/aom_integer.h"
#include "aom_dsp/x86/transpose_sse2.h"
#include "aom_dsp/x86/txfm_common_sse2.h"
#include "av1/common/av1_txfm.h"
#ifdef __cplusplus
extern "C" {
#endif
static inline void btf_16_w4_sse2(
const __m128i *const w0, const __m128i *const w1, const __m128i __rounding,
const int8_t cos_bit, const __m128i *const in0, const __m128i *const in1,
__m128i *const out0, __m128i *const out1) { … }
#define btf_16_4p_sse2(w0, w1, in0, in1, out0, out1) …
#define btf_16_sse2(w0, w1, in0, in1, out0, out1) …
static inline __m128i load_16bit_to_16bit(const int16_t *a) { … }
static inline __m128i load_32bit_to_16bit(const int32_t *a) { … }
static inline __m128i load_32bit_to_16bit_w4(const int32_t *a) { … }
static inline void store_16bit_to_32bit_w4(const __m128i a, int32_t *const b) { … }
static inline void store_16bit_to_32bit(__m128i a, int32_t *b) { … }
static inline __m128i scale_round_sse2(const __m128i a, const int scale) { … }
static inline void store_rect_16bit_to_32bit_w4(const __m128i a,
int32_t *const b) { … }
static inline void store_rect_16bit_to_32bit(const __m128i a,
int32_t *const b) { … }
static inline void load_buffer_16bit_to_16bit_w4(const int16_t *const in,
const int stride,
__m128i *const out,
const int out_size) { … }
static inline void load_buffer_16bit_to_16bit_w4_flip(const int16_t *const in,
const int stride,
__m128i *const out,
const int out_size) { … }
static inline void load_buffer_16bit_to_16bit(const int16_t *in, int stride,
__m128i *out, int out_size) { … }
static inline void load_buffer_16bit_to_16bit_flip(const int16_t *in,
int stride, __m128i *out,
int out_size) { … }
static inline void load_buffer_32bit_to_16bit(const int32_t *in, int stride,
__m128i *out, int out_size) { … }
static inline void load_buffer_32bit_to_16bit_w4(const int32_t *in, int stride,
__m128i *out, int out_size) { … }
static inline void load_buffer_32bit_to_16bit_flip(const int32_t *in,
int stride, __m128i *out,
int out_size) { … }
static inline void store_buffer_16bit_to_32bit_w4(const __m128i *const in,
int32_t *const out,
const int stride,
const int out_size) { … }
static inline void store_buffer_16bit_to_32bit_w8(const __m128i *const in,
int32_t *const out,
const int stride,
const int out_size) { … }
static inline void store_rect_buffer_16bit_to_32bit_w4(const __m128i *const in,
int32_t *const out,
const int stride,
const int out_size) { … }
static inline void store_rect_buffer_16bit_to_32bit_w8(const __m128i *const in,
int32_t *const out,
const int stride,
const int out_size) { … }
static inline void store_buffer_16bit_to_16bit_8x8(const __m128i *in,
uint16_t *out,
const int stride) { … }
static inline void round_shift_16bit(__m128i *in, int size, int bit) { … }
static inline void flip_buf_sse2(__m128i *in, __m128i *out, int size) { … }
void av1_lowbd_fwd_txfm2d_4x4_sse2(const int16_t *input, int32_t *output,
int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_4x8_sse2(const int16_t *input, int32_t *output,
int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_4x16_sse2(const int16_t *input, int32_t *output,
int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_8x4_sse2(const int16_t *input, int32_t *output,
int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_8x8_sse2(const int16_t *input, int32_t *output,
int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_8x16_sse2(const int16_t *input, int32_t *output,
int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_8x32_sse2(const int16_t *input, int32_t *output,
int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_16x4_sse2(const int16_t *input, int32_t *output,
int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_16x8_sse2(const int16_t *input, int32_t *output,
int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_16x16_sse2(const int16_t *input, int32_t *output,
int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_16x32_sse2(const int16_t *input, int32_t *output,
int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_32x8_sse2(const int16_t *input, int32_t *output,
int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_32x16_sse2(const int16_t *input, int32_t *output,
int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_32x32_sse2(const int16_t *input, int32_t *output,
int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_16x64_sse2(const int16_t *input, int32_t *output,
int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_64x16_sse2(const int16_t *input, int32_t *output,
int stride, TX_TYPE tx_type, int bd);
transform_1d_sse2;
void av1_iadst8_sse2(const __m128i *input, __m128i *output);
void av1_idct8_sse2(const __m128i *input, __m128i *output);
transform_2d_sse2;
#ifdef __cplusplus
}
#endif
#endif