chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/fwd_dct32x32_impl_sse2.h

/*
 *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <emmintrin.h>  // SSE2

#include "vpx_dsp/fwd_txfm.h"
#include "vpx_dsp/txfm_common.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"

// TODO(jingning) The high bit-depth version needs re-work for performance.
// The current SSE2 implementation also causes cross reference to the static
// functions in the C implementation file.
#if DCT_HIGH_BIT_DEPTH
#define ADD_EPI16
#define SUB_EPI16
#if FDCT32x32_HIGH_PRECISION
static void vpx_fdct32x32_rows_c(const int16_t *intermediate, tran_low_t *out) {
  int i, j;
  for (i = 0; i < 32; ++i) {
    tran_high_t temp_in[32], temp_out[32];
    for (j = 0; j < 32; ++j) temp_in[j] = intermediate[j * 32 + i];
    vpx_fdct32(temp_in, temp_out, 0);
    for (j = 0; j < 32; ++j)
      out[j + i * 32] =
          (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
  }
}
#define HIGH_FDCT32x32_2D_C
#define HIGH_FDCT32x32_2D_ROWS_C
#else
static void vpx_fdct32x32_rd_rows_c(const int16_t *intermediate,
                                    tran_low_t *out) {
  int i, j;
  for (i = 0; i < 32; ++i) {
    tran_high_t temp_in[32], temp_out[32];
    for (j = 0; j < 32; ++j) temp_in[j] = intermediate[j * 32 + i];
    vpx_fdct32(temp_in, temp_out, 1);
    for (j = 0; j < 32; ++j) out[j + i * 32] = (tran_low_t)temp_out[j];
  }
}
#define HIGH_FDCT32x32_2D_C
#define HIGH_FDCT32x32_2D_ROWS_C
#endif  // FDCT32x32_HIGH_PRECISION
#else
#define ADD_EPI16
#define SUB_EPI16
#endif  // DCT_HIGH_BIT_DEPTH

void FDCT32x32_2D(const int16_t *input, tran_low_t *output_org, int stride) {}  // NOLINT

#undef ADD_EPI16
#undef SUB_EPI16
#undef HIGH_FDCT32x32_2D_C
#undef HIGH_FDCT32x32_2D_ROWS_C