inv_txfm_sse2.c | Explore in Territory

/*
 *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <emmintrin.h>  // SSE2

#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/transpose_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"

static INLINE void transpose_16bit_4(__m128i *res) { … }

void vpx_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest,
                             int stride) { … }

void vpx_idct4x4_1_add_sse2(const tran_low_t *input, uint8_t *dest,
                            int stride) { … }

void idct4_sse2(__m128i *const in) { … }

void iadst4_sse2(__m128i *const in) { … }

static INLINE void load_buffer_8x8(const tran_low_t *const input,
                                   __m128i *const in) { … }

void vpx_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest,
                             int stride) { … }

void vpx_idct8x8_12_add_sse2(const tran_low_t *input, uint8_t *dest,
                             int stride) { … }

static INLINE void recon_and_store_8_dual(uint8_t *const dest,
                                          const __m128i in_x,
                                          const int stride) { … }

void vpx_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest,
                            int stride) { … }

void vpx_idct8_sse2(__m128i *const in) { … }

void iadst8_sse2(__m128i *const in) { … }

static INLINE void idct16_load8x8(const tran_low_t *const input,
                                  __m128i *const in) { … }

void vpx_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest,
                                int stride) { … }

void vpx_idct16x16_38_add_sse2(const tran_low_t *input, uint8_t *dest,
                               int stride) { … }

void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest,
                               int stride) { … }

static INLINE void recon_and_store_16(uint8_t *const dest, const __m128i in_x) { … }

void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest,
                              int stride) { … }

void vpx_iadst16_8col_sse2(__m128i *const in) { … }

void idct16_sse2(__m128i *const in0, __m128i *const in1) { … }

void iadst16_sse2(__m128i *const in0, __m128i *const in1) { … }

// Group the coefficient calculation into smaller functions to prevent stack
// spillover in 32x32 idct optimizations:
// quarter_1: 0-7
// quarter_2: 8-15
// quarter_3_4: 16-23, 24-31

// For each 8x32 block __m128i in[32],
// Input with index, 0, 4
// output pixels: 0-7 in __m128i out[32]
static INLINE void idct32_34_8x32_quarter_1(const __m128i *const in /*in[32]*/,
                                            __m128i *const out /*out[8]*/) { … }

// For each 8x32 block __m128i in[32],
// Input with index, 2, 6
// output pixels: 8-15 in __m128i out[32]
static INLINE void idct32_34_8x32_quarter_2(const __m128i *const in /*in[32]*/,
                                            __m128i *const out /*out[16]*/) { … }

static INLINE void idct32_34_8x32_quarter_1_2(
    const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { … }

// For each 8x32 block __m128i in[32],
// Input with odd index, 1, 3, 5, 7
// output pixels: 16-23, 24-31 in __m128i out[32]
static INLINE void idct32_34_8x32_quarter_3_4(
    const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { … }

void idct32_34_8x32_sse2(const __m128i *const in /*in[32]*/,
                         __m128i *const out /*out[32]*/) { … }

// Only upper-left 8x8 has non-zero coeff
void vpx_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest,
                               int stride) { … }

// For each 8x32 block __m128i in[32],
// Input with index, 0, 4, 8, 12, 16, 20, 24, 28
// output pixels: 0-7 in __m128i out[32]
static INLINE void idct32_1024_8x32_quarter_1(
    const __m128i *const in /*in[32]*/, __m128i *const out /*out[8]*/) { … }

// For each 8x32 block __m128i in[32],
// Input with index, 2, 6, 10, 14, 18, 22, 26, 30
// output pixels: 8-15 in __m128i out[32]
static INLINE void idct32_1024_8x32_quarter_2(
    const __m128i *const in /*in[32]*/, __m128i *const out /*out[16]*/) { … }

static INLINE void idct32_1024_8x32_quarter_1_2(
    const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { … }

// For each 8x32 block __m128i in[32],
// Input with odd index,
// 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
// output pixels: 16-23, 24-31 in __m128i out[32]
static INLINE void idct32_1024_8x32_quarter_3_4(
    const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { … }

void idct32_1024_8x32(const __m128i *const in /*in[32]*/,
                      __m128i *const out /*out[32]*/) { … }

void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest,
                                 int stride) { … }

void vpx_idct32x32_135_add_sse2(const tran_low_t *input, uint8_t *dest,
                                int stride) { … }

void vpx_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest,
                              int stride) { … }
chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/inv_txfm_sse2.c