#include <assert.h>
#include <smmintrin.h>
#include "config/aom_config.h"
#include "config/av1_rtcd.h"
#include "av1/common/av1_inv_txfm1d_cfg.h"
#include "av1/common/idct.h"
#include "av1/common/x86/av1_inv_txfm_ssse3.h"
#include "av1/common/x86/av1_txfm_sse2.h"
#include "av1/common/x86/av1_txfm_sse4.h"
#include "av1/common/x86/highbd_txfm_utility_sse4.h"
static inline __m128i highbd_clamp_epi16(__m128i u, int bd) { … }
static inline void round_shift_4x4(__m128i *in, int shift) { … }
static void round_shift_8x8(__m128i *in, int shift) { … }
static void highbd_clamp_epi32_sse4_1(__m128i *in, __m128i *out,
const __m128i *clamp_lo,
const __m128i *clamp_hi, int size) { … }
static inline __m128i highbd_get_recon_8x8_sse4_1(const __m128i pred,
__m128i res0, __m128i res1,
const int bd) { … }
static inline __m128i highbd_get_recon_4xn_sse4_1(const __m128i pred,
__m128i res0, const int bd) { … }
static inline void highbd_write_buffer_4xn_sse4_1(__m128i *in, uint16_t *output,
int stride, int flipud,
int height, const int bd) { … }
static inline void highbd_write_buffer_8xn_sse4_1(__m128i *in, uint16_t *output,
int stride, int flipud,
int height, const int bd) { … }
static inline void load_buffer_32bit_input(const int32_t *in, int stride,
__m128i *out, int out_size) { … }
static inline void load_buffer_4x4(const int32_t *coeff, __m128i *in) { … }
void av1_highbd_iwht4x4_16_add_sse4_1(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) { … }
static void addsub_sse4_1(const __m128i in0, const __m128i in1, __m128i *out0,
__m128i *out1, const __m128i *clamp_lo,
const __m128i *clamp_hi) { … }
static void shift_and_clamp_sse4_1(__m128i *in0, __m128i *in1,
const __m128i *clamp_lo,
const __m128i *clamp_hi, int shift) { … }
static inline void idct32_stage4_sse4_1(
__m128i *bf1, const __m128i *cospim8, const __m128i *cospi56,
const __m128i *cospi8, const __m128i *cospim56, const __m128i *cospim40,
const __m128i *cospi24, const __m128i *cospi40, const __m128i *cospim24,
const __m128i *rounding, int bit) { … }
static inline void idct32_stage5_sse4_1(
__m128i *bf1, const __m128i *cospim16, const __m128i *cospi48,
const __m128i *cospi16, const __m128i *cospim48, const __m128i *clamp_lo,
const __m128i *clamp_hi, const __m128i *rounding, int bit) { … }
static inline void idct32_stage6_sse4_1(
__m128i *bf1, const __m128i *cospim32, const __m128i *cospi32,
const __m128i *cospim16, const __m128i *cospi48, const __m128i *cospi16,
const __m128i *cospim48, const __m128i *clamp_lo, const __m128i *clamp_hi,
const __m128i *rounding, int bit) { … }
static inline void idct32_stage7_sse4_1(__m128i *bf1, const __m128i *cospim32,
const __m128i *cospi32,
const __m128i *clamp_lo,
const __m128i *clamp_hi,
const __m128i *rounding, int bit) { … }
static inline void idct32_stage8_sse4_1(__m128i *bf1, const __m128i *cospim32,
const __m128i *cospi32,
const __m128i *clamp_lo,
const __m128i *clamp_hi,
const __m128i *rounding, int bit) { … }
static inline void idct32_stage9_sse4_1(__m128i *bf1, __m128i *out,
const int do_cols, const int bd,
const int out_shift,
const __m128i *clamp_lo,
const __m128i *clamp_hi) { … }
static void neg_shift_sse4_1(const __m128i in0, const __m128i in1,
__m128i *out0, __m128i *out1,
const __m128i *clamp_lo, const __m128i *clamp_hi,
int shift) { … }
static void idct4x4_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void iadst4x4_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void write_buffer_4x4(__m128i *in, uint16_t *output, int stride,
int fliplr, int flipud, int shift, int bd) { … }
static void iidentity4_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
int bd, int out_shift) { … }
void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) { … }
static void load_buffer_8x8(const int32_t *coeff, __m128i *in) { … }
static void idct8x8_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void iadst8x8_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void iidentity8_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static __m128i get_recon_8x8(const __m128i pred, __m128i res_lo, __m128i res_hi,
int fliplr, int bd) { … }
static void write_buffer_8x8(__m128i *in, uint16_t *output, int stride,
int fliplr, int flipud, int shift, int bd) { … }
void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) { … }
static void idct8x8_low1_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void idct8x8_new_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void iadst8x8_low1_sse4_1(__m128i *in, __m128i *out, int bit,
int do_cols, int bd, int out_shift) { … }
static void iadst8x8_new_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void idct16x16_low1_sse4_1(__m128i *in, __m128i *out, int bit,
int do_cols, int bd, int out_shift) { … }
static void idct16x16_low8_sse4_1(__m128i *in, __m128i *out, int bit,
int do_cols, int bd, int out_shift) { … }
static void iadst16x16_low1_sse4_1(__m128i *in, __m128i *out, int bit,
int do_cols, int bd, int out_shift) { … }
static void iadst16x16_low8_sse4_1(__m128i *in, __m128i *out, int bit,
int do_cols, int bd, int out_shift) { … }
static void idct16x16_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void iadst16x16_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void iidentity16_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static inline void idct64_stage8_sse4_1(
__m128i *u, const __m128i *cospim32, const __m128i *cospi32,
const __m128i *cospim16, const __m128i *cospi48, const __m128i *cospi16,
const __m128i *cospim48, const __m128i *clamp_lo, const __m128i *clamp_hi,
const __m128i *rnding, int bit) { … }
static inline void idct64_stage9_sse4_1(__m128i *u, const __m128i *cospim32,
const __m128i *cospi32,
const __m128i *clamp_lo,
const __m128i *clamp_hi,
const __m128i *rnding, int bit) { … }
static inline void idct64_stage10_sse4_1(__m128i *u, const __m128i *cospim32,
const __m128i *cospi32,
const __m128i *clamp_lo,
const __m128i *clamp_hi,
const __m128i *rnding, int bit) { … }
static inline void idct64_stage11_sse4_1(__m128i *u, __m128i *out, int do_cols,
int bd, int out_shift,
const __m128i *clamp_lo,
const __m128i *clamp_hi) { … }
static void idct64x64_low1_sse4_1(__m128i *in, __m128i *out, int bit,
int do_cols, int bd, int out_shift) { … }
static void idct64x64_low8_sse4_1(__m128i *in, __m128i *out, int bit,
int do_cols, int bd, int out_shift) { … }
static void idct64x64_low16_sse4_1(__m128i *in, __m128i *out, int bit,
int do_cols, int bd, int out_shift) { … }
static void idct64x64_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void idct32x32_low1_sse4_1(__m128i *in, __m128i *out, int bit,
int do_cols, int bd, int out_shift) { … }
static void idct32x32_low8_sse4_1(__m128i *in, __m128i *out, int bit,
int do_cols, int bd, int out_shift) { … }
static void idct32x32_low16_sse4_1(__m128i *in, __m128i *out, int bit,
int do_cols, int bd, int out_shift) { … }
static void idct32x32_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void av1_highbd_inv_txfm_add_8x8_sse4_1(const tran_low_t *input,
uint8_t *dest, int stride,
const TxfmParam *txfm_param) { … }
static void av1_highbd_inv_txfm_add_4x4_sse4_1(const tran_low_t *input,
uint8_t *dest, int stride,
const TxfmParam *txfm_param) { … }
static void iidentity32_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static const transform_1d_sse4_1
highbd_txfm_all_1d_zeros_w8_arr[TX_SIZES][ITX_TYPES_1D][4] = …;
static void highbd_inv_txfm2d_add_h_identity_ssse41(const int32_t *input,
uint16_t *output,
int stride, TX_TYPE tx_type,
TX_SIZE tx_size, int eob,
const int bd) { … }
static void highbd_inv_txfm2d_add_v_identity_ssse41(const int32_t *input,
uint16_t *output,
int stride, TX_TYPE tx_type,
TX_SIZE tx_size, int eob,
const int bd) { … }
static void highbd_inv_txfm2d_add_idtx_ssse41(const int32_t *input,
uint16_t *output, int stride,
TX_TYPE tx_type, TX_SIZE tx_size,
int eob, const int bd) { … }
static void highbd_inv_txfm2d_add_no_identity_sse41(const int32_t *input,
uint16_t *output,
int stride, TX_TYPE tx_type,
TX_SIZE tx_size, int eob,
const int bd) { … }
static void highbd_inv_txfm2d_add_4x8_sse41(const int32_t *input,
uint16_t *output, int stride,
TX_TYPE tx_type, TX_SIZE tx_size,
int eob, const int bd) { … }
static void highbd_inv_txfm2d_add_8x4_sse41(const int32_t *input,
uint16_t *output, int stride,
TX_TYPE tx_type, TX_SIZE tx_size,
int eob, const int bd) { … }
static void highbd_inv_txfm2d_add_4x16_sse4_1(const int32_t *input,
uint16_t *output, int stride,
TX_TYPE tx_type, TX_SIZE tx_size,
int eob, const int bd) { … }
static void highbd_inv_txfm2d_add_16x4_sse4_1(const int32_t *input,
uint16_t *output, int stride,
TX_TYPE tx_type, TX_SIZE tx_size,
int eob, const int bd) { … }
void av1_highbd_inv_txfm2d_add_universe_sse4_1(const int32_t *input,
uint8_t *output, int stride,
TX_TYPE tx_type, TX_SIZE tx_size,
int eob, const int bd) { … }
static void av1_highbd_inv_txfm_add_4x8_sse4_1(const tran_low_t *input,
uint8_t *dest, int stride,
const TxfmParam *txfm_param) { … }
static void av1_highbd_inv_txfm_add_8x4_sse4_1(const tran_low_t *input,
uint8_t *dest, int stride,
const TxfmParam *txfm_param) { … }
static void av1_highbd_inv_txfm_add_4x16_sse4_1(const tran_low_t *input,
uint8_t *dest, int stride,
const TxfmParam *txfm_param) { … }
static void av1_highbd_inv_txfm_add_16x4_sse4_1(const tran_low_t *input,
uint8_t *dest, int stride,
const TxfmParam *txfm_param) { … }
void av1_highbd_inv_txfm_add_sse4_1(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) { … }