#include <assert.h>
#include <immintrin.h>
#include "config/aom_config.h"
#include "config/av1_rtcd.h"
#include "av1/common/av1_inv_txfm1d_cfg.h"
#include "av1/common/idct.h"
#include "av1/common/x86/av1_inv_txfm_ssse3.h"
#include "av1/common/x86/highbd_txfm_utility_sse4.h"
#include "aom_dsp/x86/txfm_common_avx2.h"
static inline __m256i highbd_clamp_epi16_avx2(__m256i u, int bd) { … }
static inline void round_shift_4x4_avx2(__m256i *in, int shift) { … }
static inline void round_shift_8x8_avx2(__m256i *in, int shift) { … }
static void highbd_clamp_epi32_avx2(__m256i *in, __m256i *out,
const __m256i *clamp_lo,
const __m256i *clamp_hi, int size) { … }
static inline __m256i highbd_get_recon_16x8_avx2(const __m256i pred,
__m256i res0, __m256i res1,
const int bd) { … }
static inline void highbd_write_buffer_16xn_avx2(__m256i *in, uint16_t *output,
int stride, int flipud,
int height, const int bd) { … }
static inline __m256i highbd_get_recon_8x8_avx2(const __m256i pred, __m256i res,
const int bd) { … }
static inline void highbd_write_buffer_8xn_avx2(__m256i *in, uint16_t *output,
int stride, int flipud,
int height, const int bd) { … }
static void neg_shift_avx2(const __m256i in0, const __m256i in1, __m256i *out0,
__m256i *out1, const __m256i *clamp_lo,
const __m256i *clamp_hi, int shift) { … }
static void transpose_8x8_avx2(const __m256i *in, __m256i *out) { … }
static void transpose_8x8_flip_avx2(const __m256i *in, __m256i *out) { … }
static inline void load_buffer_32bit_input(const int32_t *in, int stride,
__m256i *out, int out_size) { … }
static inline __m256i half_btf_0_avx2(const __m256i *w0, const __m256i *n0,
const __m256i *rounding, int bit) { … }
static inline __m256i half_btf_avx2(const __m256i *w0, const __m256i *n0,
const __m256i *w1, const __m256i *n1,
const __m256i *rounding, int bit) { … }
static void addsub_avx2(const __m256i in0, const __m256i in1, __m256i *out0,
__m256i *out1, const __m256i *clamp_lo,
const __m256i *clamp_hi) { … }
static inline void idct32_stage4_avx2(
__m256i *bf1, const __m256i *cospim8, const __m256i *cospi56,
const __m256i *cospi8, const __m256i *cospim56, const __m256i *cospim40,
const __m256i *cospi24, const __m256i *cospi40, const __m256i *cospim24,
const __m256i *rounding, int bit) { … }
static inline void idct32_stage5_avx2(
__m256i *bf1, const __m256i *cospim16, const __m256i *cospi48,
const __m256i *cospi16, const __m256i *cospim48, const __m256i *clamp_lo,
const __m256i *clamp_hi, const __m256i *rounding, int bit) { … }
static inline void idct32_stage6_avx2(
__m256i *bf1, const __m256i *cospim32, const __m256i *cospi32,
const __m256i *cospim16, const __m256i *cospi48, const __m256i *cospi16,
const __m256i *cospim48, const __m256i *clamp_lo, const __m256i *clamp_hi,
const __m256i *rounding, int bit) { … }
static inline void idct32_stage7_avx2(__m256i *bf1, const __m256i *cospim32,
const __m256i *cospi32,
const __m256i *clamp_lo,
const __m256i *clamp_hi,
const __m256i *rounding, int bit) { … }
static inline void idct32_stage8_avx2(__m256i *bf1, const __m256i *cospim32,
const __m256i *cospi32,
const __m256i *clamp_lo,
const __m256i *clamp_hi,
const __m256i *rounding, int bit) { … }
static inline void idct32_stage9_avx2(__m256i *bf1, __m256i *out,
const int do_cols, const int bd,
const int out_shift,
const __m256i *clamp_lo,
const __m256i *clamp_hi) { … }
static void idct32_low1_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void idct32_low8_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void idct32_low16_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void idct32_avx2(__m256i *in, __m256i *out, int bit, int do_cols, int bd,
int out_shift) { … }
static void idct16_low1_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void idct16_low8_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void idct16_avx2(__m256i *in, __m256i *out, int bit, int do_cols, int bd,
int out_shift) { … }
static void iadst16_low1_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void iadst16_low8_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void iadst16_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void idct8x8_low1_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void idct8x8_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void iadst8x8_low1_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void iadst8x8_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static inline void idct64_stage8_avx2(
__m256i *u, const __m256i *cospim32, const __m256i *cospi32,
const __m256i *cospim16, const __m256i *cospi48, const __m256i *cospi16,
const __m256i *cospim48, const __m256i *clamp_lo, const __m256i *clamp_hi,
const __m256i *rnding, int bit) { … }
static inline void idct64_stage9_avx2(__m256i *u, const __m256i *cospim32,
const __m256i *cospi32,
const __m256i *clamp_lo,
const __m256i *clamp_hi,
const __m256i *rnding, int bit) { … }
static inline void idct64_stage10_avx2(__m256i *u, const __m256i *cospim32,
const __m256i *cospi32,
const __m256i *clamp_lo,
const __m256i *clamp_hi,
const __m256i *rnding, int bit) { … }
static inline void idct64_stage11_avx2(__m256i *u, __m256i *out, int do_cols,
int bd, int out_shift,
const __m256i *clamp_lo,
const __m256i *clamp_hi) { … }
static void idct64_low1_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void idct64_low8_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void idct64_low16_avx2(__m256i *in, __m256i *out, int bit, int do_cols,
int bd, int out_shift) { … }
static void idct64_avx2(__m256i *in, __m256i *out, int bit, int do_cols, int bd,
int out_shift) { … }
transform_1d_avx2;
static const transform_1d_avx2
highbd_txfm_all_1d_zeros_w8_arr[TX_SIZES][ITX_TYPES_1D][4] = …;
static void highbd_inv_txfm2d_add_no_identity_avx2(const int32_t *input,
uint16_t *output, int stride,
TX_TYPE tx_type,
TX_SIZE tx_size, int eob,
const int bd) { … }
static void av1_highbd_inv_txfm2d_add_universe_avx2(const int32_t *input,
uint8_t *output, int stride,
TX_TYPE tx_type,
TX_SIZE tx_size, int eob,
const int bd) { … }
void av1_highbd_inv_txfm_add_avx2(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) { … }