#include <emmintrin.h>
#include "vpx_dsp/fwd_txfm.h"
#include "vpx_dsp/txfm_common.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
#if DCT_HIGH_BIT_DEPTH
#define ADD_EPI16 …
#define SUB_EPI16 …
#if FDCT32x32_HIGH_PRECISION
static void vpx_fdct32x32_rows_c(const int16_t *intermediate, tran_low_t *out) {
int i, j;
for (i = 0; i < 32; ++i) {
tran_high_t temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j) temp_in[j] = intermediate[j * 32 + i];
vpx_fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
out[j + i * 32] =
(tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
}
}
#define HIGH_FDCT32x32_2D_C …
#define HIGH_FDCT32x32_2D_ROWS_C …
#else
static void vpx_fdct32x32_rd_rows_c(const int16_t *intermediate,
tran_low_t *out) {
int i, j;
for (i = 0; i < 32; ++i) {
tran_high_t temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j) temp_in[j] = intermediate[j * 32 + i];
vpx_fdct32(temp_in, temp_out, 1);
for (j = 0; j < 32; ++j) out[j + i * 32] = (tran_low_t)temp_out[j];
}
}
#define HIGH_FDCT32x32_2D_C …
#define HIGH_FDCT32x32_2D_ROWS_C …
#endif
#else
#define ADD_EPI16 …
#define SUB_EPI16 …
#endif
void FDCT32x32_2D(const int16_t *input, tran_low_t *output_org, int stride) { … }
#undef ADD_EPI16
#undef SUB_EPI16
#undef HIGH_FDCT32x32_2D_C
#undef HIGH_FDCT32x32_2D_ROWS_C