itx_1d.c | Explore in Territory

/*
 * Copyright © 2018-2019, VideoLAN and dav1d authors
 * Copyright © 2018-2019, Two Orioles, LLC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "config.h"

#include <stddef.h>
#include <stdint.h>

#include "common/intops.h"

#include "src/itx_1d.h"

#define CLIP(a) …

/*
 * In some places, we use the pattern like this:
 * t2 = ((in1 *  1567         - in3 * (3784 - 4096) + 2048) >> 12) - in3;
 * even though the reference code might use something like:
 * t2 =  (in1 *  1567         - in3 *  3784         + 2048) >> 12;
 *
 * The reason for this is that for 12 bits/component bitstreams (corrupt/
 * invalid ones, but they are codable nonetheless), each coefficient or
 * input can be 19(+sign) bits, and therefore if the combination of the
 * two multipliers (each 12 bits) is >= 4096, the result of the add/sub
 * after the pair of multiplies will exceed the 31+sign bit range. Signed
 * integer overflows are UB in C, and we'd like to prevent that.
 *
 * To workaround this, we invert one of the two coefficients (or, if both are
 * multiples of 2, we reduce their magnitude by one bit). It should be noted
 * that SIMD implementations do not have to follow this exact behaviour. The
 * AV1 spec clearly states that the result of the multiply/add pairs should
 * fit in 31+sign bit intermediates, and that streams violating this convention
 * are not AV1-compliant. So, as long as we don't trigger UB (which some people
 * would consider a security vulnerability), we're fine. So, SIMD can simply
 * use the faster implementation, even if that might in some cases result in
 * integer overflows, since these are not considered valid AV1 anyway, and in
 * e.g. x86 assembly, integer overflows are not considered UB, but they merely
 * wrap around.
 */

static NOINLINE void
inv_dct4_1d_internal_c(int32_t *const c, const ptrdiff_t stride,
                       const int min, const int max, const int tx64)
{ … }

static void inv_dct4_1d_c(int32_t *const c, const ptrdiff_t stride,
                          const int min, const int max)
{ … }

static NOINLINE void
inv_dct8_1d_internal_c(int32_t *const c, const ptrdiff_t stride,
                       const int min, const int max, const int tx64)
{ … }

static void inv_dct8_1d_c(int32_t *const c, const ptrdiff_t stride,
                          const int min, const int max)
{ … }

static NOINLINE void
inv_dct16_1d_internal_c(int32_t *const c, const ptrdiff_t stride,
                        const int min, const int max, int tx64)
{ … }

static void inv_dct16_1d_c(int32_t *const c, const ptrdiff_t stride,
                           const int min, const int max)
{ … }

static NOINLINE void
inv_dct32_1d_internal_c(int32_t *const c, const ptrdiff_t stride,
                        const int min, const int max, const int tx64)
{ … }

static void inv_dct32_1d_c(int32_t *const c, const ptrdiff_t stride,
                           const int min, const int max)
{ … }

static void inv_dct64_1d_c(int32_t *const c, const ptrdiff_t stride,
                           const int min, const int max)
{ … }

static NOINLINE void
inv_adst4_1d_internal_c(const int32_t *const in, const ptrdiff_t in_s,
                        const int min, const int max,
                        int32_t *const out, const ptrdiff_t out_s)
{ … }

static NOINLINE void
inv_adst8_1d_internal_c(const int32_t *const in, const ptrdiff_t in_s,
                        const int min, const int max,
                        int32_t *const out, const ptrdiff_t out_s)
{ … }

static NOINLINE void
inv_adst16_1d_internal_c(const int32_t *const in, const ptrdiff_t in_s,
                         const int min, const int max,
                         int32_t *const out, const ptrdiff_t out_s)
{ … }

#define inv_adst_1d …

inv_adst_1d
inv_adst_1d
inv_adst_1d

#undef inv_adst_1d

static void inv_identity4_1d_c(int32_t *const c, const ptrdiff_t stride,
                               const int min, const int max)
{ … }

static void inv_identity8_1d_c(int32_t *const c, const ptrdiff_t stride,
                               const int min, const int max)
{ … }

static void inv_identity16_1d_c(int32_t *const c, const ptrdiff_t stride,
                                const int min, const int max)
{ … }

static void inv_identity32_1d_c(int32_t *const c, const ptrdiff_t stride,
                                const int min, const int max)
{ … }

const itx_1d_fn dav1d_tx1d_fns[N_TX_SIZES][N_TX_1D_TYPES] = …;

const uint8_t /* enum Tx1dType */ dav1d_tx1d_types[N_TX_TYPES][2] = …;

#if !(HAVE_ASM && TRIM_DSP_FUNCTIONS && ( \
  ARCH_AARCH64 || \
  (ARCH_ARM && (defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32))) \
))
void dav1d_inv_wht4_1d_c(int32_t *const c, const ptrdiff_t stride) { … }
#endif
chromium/third_party/dav1d/libdav1d/src/itx_1d.c