chromium/third_party/libaom/source/libaom/aom_dsp/fft_common.h

/*
 * Copyright (c) 2018, Alliance for Open Media. All rights reserved.
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

#ifndef AOM_AOM_DSP_FFT_COMMON_H_
#define AOM_AOM_DSP_FFT_COMMON_H_

#ifdef __cplusplus
extern "C" {
#endif

/*!\brief A function pointer for computing 1d fft and ifft.
 *
 * The function will point to an implementation for a specific transform size,
 * and may perform the transforms using vectorized instructions.
 *
 * For a non-vectorized forward transforms of size n, the input and output
 * buffers will be size n. The output takes advantage of conjugate symmetry and
 * packs the results as: [r_0, r_1, ..., r_{n/2}, i_1, ..., i_{n/2-1}], where
 * (r_{j}, i_{j}) is the complex output for index j.
 *
 * An inverse transform will assume that the complex "input" is packed
 * similarly. Its output will be real.
 *
 * Non-vectorized transforms (e.g., on a single row) would use a stride = 1.
 *
 * Vectorized implementations are parallelized along the columns so that the fft
 * can be performed on multiple columns at a time. In such cases the data block
 * for input and output is typically square (n x n) and the stride will
 * correspond to the spacing between rows. At minimum, the input size must be
 * n x simd_vector_length.
 *
 * \param[in]  input   Input buffer. See above for size restrictions.
 * \param[out] output  Output buffer. See above for size restrictions.
 * \param[in]  stride  The spacing in number of elements between rows
 *                     (or elements)
 */
aom_fft_1d_func_t;

// Declare some of the forward non-vectorized transforms which are used in some
// of the vectorized implementations
void aom_fft1d_2_float(const float *input, float *output, int stride);
void aom_fft1d_4_float(const float *input, float *output, int stride);
void aom_fft1d_8_float(const float *input, float *output, int stride);
void aom_fft1d_16_float(const float *input, float *output, int stride);
void aom_fft1d_32_float(const float *input, float *output, int stride);
void aom_ifft1d_2_float(const float *input, float *output, int stride);
void aom_ifft1d_4_float(const float *input, float *output, int stride);
void aom_ifft1d_8_float(const float *input, float *output, int stride);
void aom_ifft1d_16_float(const float *input, float *output, int stride);
void aom_ifft1d_32_float(const float *input, float *output, int stride);

/**\!brief Function pointer for transposing a matrix of floats.
 *
 * \param[in]  input  Input buffer (size n x n)
 * \param[out] output Output buffer (size n x n)
 * \param[in]  n      Extent of one dimension of the square matrix.
 */
aom_fft_transpose_func_t;

/**\!brief Function pointer for re-arranging intermediate 2d transform results.
 *
 * After re-arrangement, the real and imaginary components will be packed
 * tightly next to each other.
 *
 * \param[in]  input  Input buffer (size n x n)
 * \param[out] output Output buffer (size 2 x n x n)
 * \param[in]  n      Extent of one dimension of the square matrix.
 */
aom_fft_unpack_func_t;

/*!\brief Performs a 2d fft with the given functions.
 *
 * This generator function allows for multiple different implementations of 2d
 * fft with different vector operations, without having to redefine the main
 * body multiple times.
 *
 * \param[in]  input     Input buffer to run the transform on (size n x n)
 * \param[out] temp      Working buffer for computing the transform (size n x n)
 * \param[out] output    Output buffer (size 2 x n x n)
 * \param[in]  tform     Forward transform function
 * \param[in]  transpose Transpose function (for n x n matrix)
 * \param[in]  unpack    Unpack function used to massage outputs to correct form
 * \param[in]  vec_size  Vector size (the transform is done vec_size units at
 *                       a time)
 */
void aom_fft_2d_gen(const float *input, float *temp, float *output, int n,
                    aom_fft_1d_func_t tform, aom_fft_transpose_func_t transpose,
                    aom_fft_unpack_func_t unpack, int vec_size);

/*!\brief Perform a 2d inverse fft with the given helper functions
 *
 * \param[in]  input      Input buffer to run the transform on (size 2 x n x n)
 * \param[out] temp       Working buffer for computations (size 2 x n x n)
 * \param[out] output     Output buffer (size n x n)
 * \param[in]  fft_single Forward transform function (non vectorized)
 * \param[in]  fft_multi  Forward transform function (vectorized)
 * \param[in]  ifft_multi Inverse transform function (vectorized)
 * \param[in]  transpose  Transpose function (for n x n matrix)
 * \param[in]  vec_size   Vector size (the transform is done vec_size
 *                        units at a time)
 */
void aom_ifft_2d_gen(const float *input, float *temp, float *output, int n,
                     aom_fft_1d_func_t fft_single, aom_fft_1d_func_t fft_multi,
                     aom_fft_1d_func_t ifft_multi,
                     aom_fft_transpose_func_t transpose, int vec_size);
#ifdef __cplusplus
}
#endif

// The macros below define 1D fft/ifft for different data types and for
// different simd vector intrinsic types.

#define GEN_FFT_2(ret, suffix, T, T_VEC, load, store)

#define GEN_FFT_4(ret, suffix, T, T_VEC, load, store, constant, add, sub)

#define GEN_FFT_8(ret, suffix, T, T_VEC, load, store, constant, add, sub, mul)

#define GEN_FFT_16(ret, suffix, T, T_VEC, load, store, constant, add, sub, \
                   mul)

#define GEN_FFT_32(ret, suffix, T, T_VEC, load, store, constant, add, sub,   \
                   mul)

#define GEN_IFFT_2(ret, suffix, T, T_VEC, load, store)

#define GEN_IFFT_4(ret, suffix, T, T_VEC, load, store, constant, add, sub)

#define GEN_IFFT_8(ret, suffix, T, T_VEC, load, store, constant, add, sub, \
                   mul)

#define GEN_IFFT_16(ret, suffix, T, T_VEC, load, store, constant, add, sub,   \
                    mul)
#define GEN_IFFT_32(ret, suffix, T, T_VEC, load, store, constant, add, sub,    \
                    mul)

#endif  // AOM_AOM_DSP_FFT_COMMON_H_