#ifndef AOM_AV1_COMMON_CDEF_BLOCK_SIMD_H_
#define AOM_AV1_COMMON_CDEF_BLOCK_SIMD_H_
#include "config/aom_config.h"
#include "config/av1_rtcd.h"
#include "av1/common/cdef_block.h"
static inline v128 fold_mul_and_sum(v128 partiala, v128 partialb, v128 const1,
v128 const2) { … }
static inline v128 hsum4(v128 x0, v128 x1, v128 x2, v128 x3) { … }
static inline v128 compute_directions(v128 lines[8], int32_t tmp_cost1[4]) { … }
static inline void array_reverse_transpose_8x8(v128 *in, v128 *res) { … }
int SIMD_FUNC(cdef_find_dir)(const uint16_t *img, int stride, int32_t *var,
int coeff_shift) { … }
#if defined(_MSC_VER) && defined(_M_IX86)
#define CDEF_INLINE …
#else
#define CDEF_INLINE …
#endif
CDEF_INLINE v256 constrain16(v256 a, v256 b, unsigned int threshold,
unsigned int adjdamp) { … }
SIMD_INLINE v256 get_max_primary(const int is_lowbd, v256 *tap, v256 max,
v256 cdef_large_value_mask) { … }
SIMD_INLINE v256 get_max_secondary(const int is_lowbd, v256 *tap, v256 max,
v256 cdef_large_value_mask) { … }
#if defined(_MSC_VER) && !defined(__clang__)
#pragma optimize("", off)
#endif
CDEF_INLINE void filter_block_4x4(const int is_lowbd, void *dest, int dstride,
const uint16_t *in, int pri_strength,
int sec_strength, int dir, int pri_damping,
int sec_damping, int coeff_shift, int height,
int enable_primary, int enable_secondary) { … }
CDEF_INLINE void filter_block_8x8(const int is_lowbd, void *dest, int dstride,
const uint16_t *in, int pri_strength,
int sec_strength, int dir, int pri_damping,
int sec_damping, int coeff_shift, int height,
int enable_primary, int enable_secondary) { … }
#if defined(_MSC_VER) && !defined(__clang__)
#pragma optimize("", on)
#endif
SIMD_INLINE void copy_block_4xh(const int is_lowbd, void *dest, int dstride,
const uint16_t *in, int height) { … }
SIMD_INLINE void copy_block_8xh(const int is_lowbd, void *dest, int dstride,
const uint16_t *in, int height) { … }
void SIMD_FUNC(cdef_filter_8_0)(void *dest, int dstride, const uint16_t *in,
int pri_strength, int sec_strength, int dir,
int pri_damping, int sec_damping,
int coeff_shift, int block_width,
int block_height) { … }
void SIMD_FUNC(cdef_filter_8_1)(void *dest, int dstride, const uint16_t *in,
int pri_strength, int sec_strength, int dir,
int pri_damping, int sec_damping,
int coeff_shift, int block_width,
int block_height) { … }
void SIMD_FUNC(cdef_filter_8_2)(void *dest, int dstride, const uint16_t *in,
int pri_strength, int sec_strength, int dir,
int pri_damping, int sec_damping,
int coeff_shift, int block_width,
int block_height) { … }
void SIMD_FUNC(cdef_filter_8_3)(void *dest, int dstride, const uint16_t *in,
int pri_strength, int sec_strength, int dir,
int pri_damping, int sec_damping,
int coeff_shift, int block_width,
int block_height) { … }
void SIMD_FUNC(cdef_filter_16_0)(void *dest, int dstride, const uint16_t *in,
int pri_strength, int sec_strength, int dir,
int pri_damping, int sec_damping,
int coeff_shift, int block_width,
int block_height) { … }
void SIMD_FUNC(cdef_filter_16_1)(void *dest, int dstride, const uint16_t *in,
int pri_strength, int sec_strength, int dir,
int pri_damping, int sec_damping,
int coeff_shift, int block_width,
int block_height) { … }
void SIMD_FUNC(cdef_filter_16_2)(void *dest, int dstride, const uint16_t *in,
int pri_strength, int sec_strength, int dir,
int pri_damping, int sec_damping,
int coeff_shift, int block_width,
int block_height) { … }
void SIMD_FUNC(cdef_filter_16_3)(void *dest, int dstride, const uint16_t *in,
int pri_strength, int sec_strength, int dir,
int pri_damping, int sec_damping,
int coeff_shift, int block_width,
int block_height) { … }
#if CONFIG_AV1_HIGHBITDEPTH
void SIMD_FUNC(cdef_copy_rect8_16bit_to_16bit)(uint16_t *dst, int dstride,
const uint16_t *src, int sstride,
int width, int height) {
int i, j;
for (i = 0; i < height; i++) {
for (j = 0; j < (width & ~0x7); j += 8) {
v128 row = v128_load_unaligned(&src[i * sstride + j]);
v128_store_unaligned(&dst[i * dstride + j], row);
}
for (; j < width; j++) {
dst[i * dstride + j] = src[i * sstride + j];
}
}
}
#endif
#undef CDEF_INLINE
#endif