highbd_fwd_txfm_avx2.c | Explore in Territory

/*
 * Copyright (c) 2018, Alliance for Open Media. All rights reserved.
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */
#include <assert.h>
#include <immintrin.h> /*AVX2*/

#include "config/aom_config.h"
#include "config/av1_rtcd.h"
#include "av1/common/av1_txfm.h"
#include "av1/encoder/av1_fwd_txfm1d_cfg.h"
#include "aom_dsp/txfm_common.h"
#include "aom_ports/mem.h"
#include "aom_dsp/x86/txfm_common_sse2.h"
#include "aom_dsp/x86/txfm_common_avx2.h"

static inline void load_buffer_8x8_avx2(const int16_t *input, __m256i *out,
                                        int stride, int flipud, int fliplr,
                                        int shift) { … }
static inline void col_txfm_8x8_rounding(__m256i *in, int shift) { … }
static inline void load_buffer_8x16_avx2(const int16_t *input, __m256i *out,
                                         int stride, int flipud, int fliplr,
                                         int shift) { … }
static inline void load_buffer_16xn_avx2(const int16_t *input, __m256i *out,
                                         int stride, int height, int outstride,
                                         int flipud, int fliplr) { … }

static void fwd_txfm_transpose_8x8_avx2(const __m256i *in, __m256i *out,
                                        const int instride,
                                        const int outstride) { … }
static inline void round_shift_32_8xn_avx2(__m256i *in, int size, int bit,
                                           int stride) { … }
static inline void store_buffer_avx2(const __m256i *const in, int32_t *out,
                                     const int stride, const int out_size) { … }
static inline void fwd_txfm_transpose_16x16_avx2(const __m256i *in,
                                                 __m256i *out) { … }

static inline __m256i av1_half_btf_avx2(const __m256i *w0, const __m256i *n0,
                                        const __m256i *w1, const __m256i *n1,
                                        const __m256i *rounding, int bit) { … }
#define btf_32_avx2_type0(w0, w1, in0, in1, out0, out1, bit) …

#define btf_32_type0_avx2_new(ww0, ww1, in0, in1, out0, out1, r, bit) …

transform_1d_avx2;
static void fdct8_avx2(__m256i *in, __m256i *out, const int8_t bit,
                       const int col_num, const int outstride) { … }
static void fadst8_avx2(__m256i *in, __m256i *out, const int8_t bit,
                        const int col_num, const int outstirde) { … }
static void idtx8_avx2(__m256i *in, __m256i *out, const int8_t bit, int col_num,
                       int outstride) { … }
void av1_fwd_txfm2d_8x8_avx2(const int16_t *input, int32_t *coeff, int stride,
                             TX_TYPE tx_type, int bd) { … }

static void fdct16_avx2(__m256i *in, __m256i *out, const int8_t bit,
                        const int col_num, const int outstride) { … }
static void fadst16_avx2(__m256i *in, __m256i *out, const int8_t bit,
                         const int num_cols, const int outstride) { … }
static void idtx16_avx2(__m256i *in, __m256i *out, const int8_t bit,
                        int col_num, const int outstride) { … }
static const transform_1d_avx2 col_highbd_txfm8x16_arr[TX_TYPES] = …;
static const transform_1d_avx2 row_highbd_txfm8x8_arr[TX_TYPES] = …;
void av1_fwd_txfm2d_8x16_avx2(const int16_t *input, int32_t *coeff, int stride,
                              TX_TYPE tx_type, int bd) { … }
static const transform_1d_avx2 col_highbd_txfm8x8_arr[TX_TYPES] = …;
static const transform_1d_avx2 row_highbd_txfm8x16_arr[TX_TYPES] = …;
void av1_fwd_txfm2d_16x8_avx2(const int16_t *input, int32_t *coeff, int stride,
                              TX_TYPE tx_type, int bd) { … }
void av1_fwd_txfm2d_16x16_avx2(const int16_t *input, int32_t *coeff, int stride,
                               TX_TYPE tx_type, int bd) { … }
static inline void fdct32_avx2(__m256i *input, __m256i *output,
                               const int8_t cos_bit, const int instride,
                               const int outstride) { … }
static inline void idtx32x32_avx2(__m256i *input, __m256i *output,
                                  const int8_t cos_bit, int instride,
                                  int outstride) { … }
static const transform_1d_avx2 col_txfm8x32_arr[TX_TYPES] = …;
static const transform_1d_avx2 row_txfm8x32_arr[TX_TYPES] = …;
void av1_fwd_txfm2d_32x32_avx2(const int16_t *input, int32_t *output,
                               int stride, TX_TYPE tx_type, int bd) { … }
static inline void fdct64_stage2_avx2(__m256i *x1, __m256i *x2,
                                      __m256i *cospi_m32, __m256i *cospi_p32,
                                      const __m256i *__rounding,
                                      int8_t cos_bit) { … }
static inline void fdct64_stage3_avx2(__m256i *x2, __m256i *x3,
                                      __m256i *cospi_m32, __m256i *cospi_p32,
                                      const __m256i *__rounding,
                                      int8_t cos_bit) { … }
static inline void fdct64_stage4_avx2(__m256i *x3, __m256i *x4,
                                      __m256i *cospi_m32, __m256i *cospi_p32,
                                      __m256i *cospi_m16, __m256i *cospi_p48,
                                      __m256i *cospi_m48,
                                      const __m256i *__rounding,
                                      int8_t cos_bit) { … }
static inline void fdct64_stage5_avx2(__m256i *x4, __m256i *x5,
                                      __m256i *cospi_m32, __m256i *cospi_p32,
                                      __m256i *cospi_m16, __m256i *cospi_p48,
                                      __m256i *cospi_m48,
                                      const __m256i *__rounding,
                                      int8_t cos_bit) { … }
static inline void fdct64_stage6_avx2(
    __m256i *x5, __m256i *x6, __m256i *cospi_p16, __m256i *cospi_p32,
    __m256i *cospi_m16, __m256i *cospi_p48, __m256i *cospi_m48,
    __m256i *cospi_m08, __m256i *cospi_p56, __m256i *cospi_m56,
    __m256i *cospi_m40, __m256i *cospi_p24, __m256i *cospi_m24,
    const __m256i *__rounding, int8_t cos_bit) { … }
static inline void fdct64_stage7_avx2(__m256i *x6, __m256i *x7,
                                      __m256i *cospi_p08, __m256i *cospi_p56,
                                      __m256i *cospi_p40, __m256i *cospi_p24,
                                      __m256i *cospi_m08, __m256i *cospi_m56,
                                      __m256i *cospi_m40, __m256i *cospi_m24,
                                      const __m256i *__rounding,
                                      int8_t cos_bit) { … }
static inline void fdct64_stage8_avx2(__m256i *x7, __m256i *x8,
                                      const int32_t *cospi,
                                      const __m256i *__rounding,
                                      int8_t cos_bit) { … }
static inline void fdct64_stage9_avx2(__m256i *x8, __m256i *x9,
                                      const int32_t *cospi,
                                      const __m256i *__rounding,
                                      int8_t cos_bit) { … }
static inline void fdct64_stage10_avx2(__m256i *x9, __m256i *x10,
                                       const int32_t *cospi,
                                       const __m256i *__rounding,
                                       int8_t cos_bit) { … }
static void fdct64_avx2(__m256i *input, __m256i *output, int8_t cos_bit,
                        const int instride, const int outstride) { … }
void av1_fwd_txfm2d_64x64_avx2(const int16_t *input, int32_t *output,
                               int stride, TX_TYPE tx_type, int bd) { … }
chromium/third_party/libaom/source/libaom/av1/encoder/x86/highbd_fwd_txfm_avx2.c