/* * Copyright © 2018-2019, VideoLAN and dav1d authors * Copyright © 2018-2019, Two Orioles, LLC * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include <stddef.h> #include <stdint.h> #include "common/intops.h" #include "src/itx_1d.h" #define CLIP(a) … /* * In some places, we use the pattern like this: * t2 = ((in1 * 1567 - in3 * (3784 - 4096) + 2048) >> 12) - in3; * even though the reference code might use something like: * t2 = (in1 * 1567 - in3 * 3784 + 2048) >> 12; * * The reason for this is that for 12 bits/component bitstreams (corrupt/ * invalid ones, but they are codable nonetheless), each coefficient or * input can be 19(+sign) bits, and therefore if the combination of the * two multipliers (each 12 bits) is >= 4096, the result of the add/sub * after the pair of multiplies will exceed the 31+sign bit range. Signed * integer overflows are UB in C, and we'd like to prevent that. * * To workaround this, we invert one of the two coefficients (or, if both are * multiples of 2, we reduce their magnitude by one bit). It should be noted * that SIMD implementations do not have to follow this exact behaviour. The * AV1 spec clearly states that the result of the multiply/add pairs should * fit in 31+sign bit intermediates, and that streams violating this convention * are not AV1-compliant. So, as long as we don't trigger UB (which some people * would consider a security vulnerability), we're fine. So, SIMD can simply * use the faster implementation, even if that might in some cases result in * integer overflows, since these are not considered valid AV1 anyway, and in * e.g. x86 assembly, integer overflows are not considered UB, but they merely * wrap around. */ static NOINLINE void inv_dct4_1d_internal_c(int32_t *const c, const ptrdiff_t stride, const int min, const int max, const int tx64) { … } static void inv_dct4_1d_c(int32_t *const c, const ptrdiff_t stride, const int min, const int max) { … } static NOINLINE void inv_dct8_1d_internal_c(int32_t *const c, const ptrdiff_t stride, const int min, const int max, const int tx64) { … } static void inv_dct8_1d_c(int32_t *const c, const ptrdiff_t stride, const int min, const int max) { … } static NOINLINE void inv_dct16_1d_internal_c(int32_t *const c, const ptrdiff_t stride, const int min, const int max, int tx64) { … } static void inv_dct16_1d_c(int32_t *const c, const ptrdiff_t stride, const int min, const int max) { … } static NOINLINE void inv_dct32_1d_internal_c(int32_t *const c, const ptrdiff_t stride, const int min, const int max, const int tx64) { … } static void inv_dct32_1d_c(int32_t *const c, const ptrdiff_t stride, const int min, const int max) { … } static void inv_dct64_1d_c(int32_t *const c, const ptrdiff_t stride, const int min, const int max) { … } static NOINLINE void inv_adst4_1d_internal_c(const int32_t *const in, const ptrdiff_t in_s, const int min, const int max, int32_t *const out, const ptrdiff_t out_s) { … } static NOINLINE void inv_adst8_1d_internal_c(const int32_t *const in, const ptrdiff_t in_s, const int min, const int max, int32_t *const out, const ptrdiff_t out_s) { … } static NOINLINE void inv_adst16_1d_internal_c(const int32_t *const in, const ptrdiff_t in_s, const int min, const int max, int32_t *const out, const ptrdiff_t out_s) { … } #define inv_adst_1d … inv_adst_1d inv_adst_1d inv_adst_1d #undef inv_adst_1d static void inv_identity4_1d_c(int32_t *const c, const ptrdiff_t stride, const int min, const int max) { … } static void inv_identity8_1d_c(int32_t *const c, const ptrdiff_t stride, const int min, const int max) { … } static void inv_identity16_1d_c(int32_t *const c, const ptrdiff_t stride, const int min, const int max) { … } static void inv_identity32_1d_c(int32_t *const c, const ptrdiff_t stride, const int min, const int max) { … } const itx_1d_fn dav1d_tx1d_fns[N_TX_SIZES][N_TX_1D_TYPES] = …; const uint8_t /* enum Tx1dType */ dav1d_tx1d_types[N_TX_TYPES][2] = …; #if !(HAVE_ASM && TRIM_DSP_FUNCTIONS && ( \ ARCH_AARCH64 || \ (ARCH_ARM && (defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32))) \ )) void dav1d_inv_wht4_1d_c(int32_t *const c, const ptrdiff_t stride) { … } #endif