/* * Copyright (c) 2016, Alliance for Open Media. All rights reserved. * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License * was not distributed with this source code in the LICENSE file, you can * obtain it at www.aomedia.org/license/software. If the Alliance for Open * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ #include "aom_dsp/aom_simd.h" #define SIMD_FUNC(name) … #include "av1/common/cdef_block_simd.h" /* partial A is a 16-bit vector of the form: [x8 - - x1 | x16 - - x9] and partial B has the form: [0 y1 - y7 | 0 y9 - y15]. This function computes (x1^2+y1^2)*C1 + (x2^2+y2^2)*C2 + ... (x7^2+y2^7)*C7 + (x8^2+0^2)*C8 on each 128-bit lane. Here the C1..C8 constants are in const1 and const2. */ static inline __m256i fold_mul_and_sum_avx2(__m256i *partiala, __m256i *partialb, const __m256i *const1, const __m256i *const2) { … } static inline __m256i hsum4_avx2(__m256i *x0, __m256i *x1, __m256i *x2, __m256i *x3) { … } /* Computes cost for directions 0, 5, 6 and 7. We can call this function again to compute the remaining directions. */ static inline __m256i compute_directions_avx2(__m256i *lines, int32_t cost_frist_8x8[4], int32_t cost_second_8x8[4]) { … } /* transpose and reverse the order of the lines -- equivalent to a 90-degree counter-clockwise rotation of the pixels. */ static inline void array_reverse_transpose_8x8_avx2(__m256i *in, __m256i *res) { … } void cdef_find_dir_dual_avx2(const uint16_t *img1, const uint16_t *img2, int stride, int32_t *var_out_1st, int32_t *var_out_2nd, int coeff_shift, int *out_dir_1st_8x8, int *out_dir_2nd_8x8) { … } void cdef_copy_rect8_8bit_to_16bit_avx2(uint16_t *dst, int dstride, const uint8_t *src, int sstride, int width, int height) { … }