chromium/third_party/libaom/source/libaom/aom_dsp/x86/variance_impl_avx2.c

/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

#include <immintrin.h>  // AVX2

#include "config/aom_dsp_rtcd.h"

#include "aom_ports/mem.h"

/* clang-format off */
DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) =;
/* clang-format on */

#define FILTER_SRC(filter)

#define MERGE_WITH_SRC(src_reg, reg)

#define LOAD_SRC_DST

#define AVG_NEXT_SRC(src_reg, size_stride)

#define MERGE_NEXT_SRC(src_reg, size_stride)

#define CALC_SUM_SSE_INSIDE_LOOP

// final calculation to sum and sse
#define CALC_SUM_AND_SSE

// Functions related to sub pixel variance width 16
#define LOAD_SRC_DST_INSERT(src_stride, dst_stride)

#define AVG_NEXT_SRC_INSERT(src_reg, size_stride)

#define MERGE_NEXT_SRC_INSERT(src_reg, size_stride)

#define LOAD_SRC_NEXT_BYTE_INSERT

#define LOAD_DST_INSERT

#define LOAD_SRC_MERGE_128BIT(filter)

#define FILTER_SRC_128BIT(filter)

// TODO([email protected]): These variance functions are macro-fied so we
// don't have to manually optimize the individual for-loops. We could save some
// binary size by optimizing the loops more carefully without duplicating the
// codes with a macro.
#define MAKE_SUB_PIXEL_VAR_32XH(height, log2height)

MAKE_SUB_PIXEL_VAR_32XH()
MAKE_SUB_PIXEL_VAR_32XH()
MAKE_SUB_PIXEL_VAR_32XH()

#define AOM_SUB_PIXEL_VAR_AVX2(w, h, wf, hf, wlog2, hlog2)

// Note: hf = AOMMIN(h, 64) to avoid overflow in helper by capping height.
AOM_SUB_PIXEL_VAR_AVX2(128, 128, 32, 64, 7, 7)
AOM_SUB_PIXEL_VAR_AVX2(128, 64, 32, 64, 7, 6)
AOM_SUB_PIXEL_VAR_AVX2(64, 128, 32, 64, 6, 7)
AOM_SUB_PIXEL_VAR_AVX2(64, 64, 32, 64, 6, 6)
AOM_SUB_PIXEL_VAR_AVX2(64, 32, 32, 32, 6, 5)

#define MAKE_SUB_PIXEL_VAR_16XH(height, log2height)

MAKE_SUB_PIXEL_VAR_16XH(32, 5)
MAKE_SUB_PIXEL_VAR_16XH(16, 4)
MAKE_SUB_PIXEL_VAR_16XH(8, 3)
#if !CONFIG_REALTIME_ONLY
MAKE_SUB_PIXEL_VAR_16XH(64, 6)
MAKE_SUB_PIXEL_VAR_16XH(4, 2)
#endif

#define MAKE_SUB_PIXEL_AVG_VAR_32XH(height, log2height)

MAKE_SUB_PIXEL_AVG_VAR_32XH()
MAKE_SUB_PIXEL_AVG_VAR_32XH()
MAKE_SUB_PIXEL_AVG_VAR_32XH()

#define AOM_SUB_PIXEL_AVG_VAR_AVX2(w, h, wf, hf, wlog2, hlog2)

// Note: hf = AOMMIN(h, 64) to avoid overflow in helper by capping height.
AOM_SUB_PIXEL_AVG_VAR_AVX2(128, 128, 32, 64, 7, 7)
AOM_SUB_PIXEL_AVG_VAR_AVX2(128, 64, 32, 64, 7, 6)
AOM_SUB_PIXEL_AVG_VAR_AVX2(64, 128, 32, 64, 6, 7)
AOM_SUB_PIXEL_AVG_VAR_AVX2(64, 64, 32, 64, 6, 6)
AOM_SUB_PIXEL_AVG_VAR_AVX2(64, 32, 32, 32, 6, 5)