cdef_block_simd.h | Explore in Territory

/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

#ifndef AOM_AV1_COMMON_CDEF_BLOCK_SIMD_H_
#define AOM_AV1_COMMON_CDEF_BLOCK_SIMD_H_

#include "config/aom_config.h"
#include "config/av1_rtcd.h"

#include "av1/common/cdef_block.h"

/* partial A is a 16-bit vector of the form:
   [x8 x7 x6 x5 x4 x3 x2 x1] and partial B has the form:
   [0  y1 y2 y3 y4 y5 y6 y7].
   This function computes (x1^2+y1^2)*C1 + (x2^2+y2^2)*C2 + ...
   (x7^2+y2^7)*C7 + (x8^2+0^2)*C8 where the C1..C8 constants are in const1
   and const2. */
static inline v128 fold_mul_and_sum(v128 partiala, v128 partialb, v128 const1,
                                    v128 const2) { … }

static inline v128 hsum4(v128 x0, v128 x1, v128 x2, v128 x3) { … }

/* Computes cost for directions 0, 5, 6 and 7. We can call this function again
   to compute the remaining directions. */
static inline v128 compute_directions(v128 lines[8], int32_t tmp_cost1[4]) { … }

/* transpose and reverse the order of the lines -- equivalent to a 90-degree
   counter-clockwise rotation of the pixels. */
static inline void array_reverse_transpose_8x8(v128 *in, v128 *res) { … }

int SIMD_FUNC(cdef_find_dir)(const uint16_t *img, int stride, int32_t *var,
                             int coeff_shift) { … }

// Work around compiler out of memory issues with Win32 builds. This issue has
// been observed with Visual Studio 2017, 2019, and 2022 (version 17.10.3).
#if defined(_MSC_VER) && defined(_M_IX86)
#define CDEF_INLINE …
#else
#define CDEF_INLINE …
#endif

// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp)))
CDEF_INLINE v256 constrain16(v256 a, v256 b, unsigned int threshold,
                             unsigned int adjdamp) { … }

SIMD_INLINE v256 get_max_primary(const int is_lowbd, v256 *tap, v256 max,
                                 v256 cdef_large_value_mask) { … }

SIMD_INLINE v256 get_max_secondary(const int is_lowbd, v256 *tap, v256 max,
                                   v256 cdef_large_value_mask) { … }

// MSVC takes far too much time optimizing these.
// https://bugs.chromium.org/p/aomedia/issues/detail?id=3395
#if defined(_MSC_VER) && !defined(__clang__)
#pragma optimize("", off)
#endif

CDEF_INLINE void filter_block_4x4(const int is_lowbd, void *dest, int dstride,
                                  const uint16_t *in, int pri_strength,
                                  int sec_strength, int dir, int pri_damping,
                                  int sec_damping, int coeff_shift, int height,
                                  int enable_primary, int enable_secondary) { … }

CDEF_INLINE void filter_block_8x8(const int is_lowbd, void *dest, int dstride,
                                  const uint16_t *in, int pri_strength,
                                  int sec_strength, int dir, int pri_damping,
                                  int sec_damping, int coeff_shift, int height,
                                  int enable_primary, int enable_secondary) { … }

#if defined(_MSC_VER) && !defined(__clang__)
#pragma optimize("", on)
#endif

SIMD_INLINE void copy_block_4xh(const int is_lowbd, void *dest, int dstride,
                                const uint16_t *in, int height) { … }

SIMD_INLINE void copy_block_8xh(const int is_lowbd, void *dest, int dstride,
                                const uint16_t *in, int height) { … }

void SIMD_FUNC(cdef_filter_8_0)(void *dest, int dstride, const uint16_t *in,
                                int pri_strength, int sec_strength, int dir,
                                int pri_damping, int sec_damping,
                                int coeff_shift, int block_width,
                                int block_height) { … }

void SIMD_FUNC(cdef_filter_8_1)(void *dest, int dstride, const uint16_t *in,
                                int pri_strength, int sec_strength, int dir,
                                int pri_damping, int sec_damping,
                                int coeff_shift, int block_width,
                                int block_height) { … }
void SIMD_FUNC(cdef_filter_8_2)(void *dest, int dstride, const uint16_t *in,
                                int pri_strength, int sec_strength, int dir,
                                int pri_damping, int sec_damping,
                                int coeff_shift, int block_width,
                                int block_height) { … }

void SIMD_FUNC(cdef_filter_8_3)(void *dest, int dstride, const uint16_t *in,
                                int pri_strength, int sec_strength, int dir,
                                int pri_damping, int sec_damping,
                                int coeff_shift, int block_width,
                                int block_height) { … }

void SIMD_FUNC(cdef_filter_16_0)(void *dest, int dstride, const uint16_t *in,
                                 int pri_strength, int sec_strength, int dir,
                                 int pri_damping, int sec_damping,
                                 int coeff_shift, int block_width,
                                 int block_height) { … }

void SIMD_FUNC(cdef_filter_16_1)(void *dest, int dstride, const uint16_t *in,
                                 int pri_strength, int sec_strength, int dir,
                                 int pri_damping, int sec_damping,
                                 int coeff_shift, int block_width,
                                 int block_height) { … }
void SIMD_FUNC(cdef_filter_16_2)(void *dest, int dstride, const uint16_t *in,
                                 int pri_strength, int sec_strength, int dir,
                                 int pri_damping, int sec_damping,
                                 int coeff_shift, int block_width,
                                 int block_height) { … }

void SIMD_FUNC(cdef_filter_16_3)(void *dest, int dstride, const uint16_t *in,
                                 int pri_strength, int sec_strength, int dir,
                                 int pri_damping, int sec_damping,
                                 int coeff_shift, int block_width,
                                 int block_height) { … }

#if CONFIG_AV1_HIGHBITDEPTH
void SIMD_FUNC(cdef_copy_rect8_16bit_to_16bit)(uint16_t *dst, int dstride,
                                               const uint16_t *src, int sstride,
                                               int width, int height) {
  int i, j;
  for (i = 0; i < height; i++) {
    for (j = 0; j < (width & ~0x7); j += 8) {
      v128 row = v128_load_unaligned(&src[i * sstride + j]);
      v128_store_unaligned(&dst[i * dstride + j], row);
    }
    for (; j < width; j++) {
      dst[i * dstride + j] = src[i * sstride + j];
    }
  }
}
#endif  // CONFIG_AV1_HIGHBITDEPTH

#undef CDEF_INLINE

#endif  // AOM_AV1_COMMON_CDEF_BLOCK_SIMD_H_
chromium/third_party/libaom/source/libaom/av1/common/cdef_block_simd.h