/* * Copyright (c) 2018, Alliance for Open Media. All rights reserved. * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License * was not distributed with this source code in the LICENSE file, you can * obtain it at www.aomedia.org/license/software. If the Alliance for Open * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ #include <immintrin.h> #include <assert.h> #include "config/av1_rtcd.h" #include "av1/common/convolve.h" #include "aom_dsp/aom_dsp_common.h" #include "aom_dsp/aom_filter.h" #include "aom_dsp/x86/convolve_avx2.h" #include "aom_dsp/x86/synonyms.h" #include "aom_dsp/x86/synonyms_avx2.h" // 128-bit xmmwords are written as [ ... ] with the MSB on the left. // 256-bit ymmwords are written as two xmmwords, [ ... ][ ... ] with the MSB // on the left. // A row of, say, 8-bit pixels with values p0, p1, p2, ..., p30, p31 will be // loaded and stored as [ p31 ... p17 p16 ][ p15 ... p1 p0 ]. // Exploiting the range of wiener filter coefficients, // horizontal filtering can be done in 16 bit intermediate precision. // The details are as follows : // Consider the horizontal wiener filter coefficients of the following form : // [C0, C1, C2, 2^(FILTER_BITS) -2 * (C0 + C1 + C2), C2, C1, C0] // Subtracting 2^(FILTER_BITS) from the centre tap we get the following : // [C0, C1, C2, -2 * (C0 + C1 + C2), C2, C1, C0] // The sum of the product "C0 * p0 + C1 * p1 + C2 * p2 -2 * (C0 + C1 + C2) * p3 // + C2 * p4 + C1 * p5 + C0 * p6" would be in the range of signed 16 bit // precision. Finally, after rounding the above result by round_0, we multiply // the centre pixel by 2^(FILTER_BITS - round_0) and add it to get the // horizontal filter output. void av1_wiener_convolve_add_src_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const WienerConvolveParams *conv_params) { … }