chromium/third_party/libaom/source/libaom/test/av1_convolve_test.cc

/*
 * Copyright (c) 2020, Alliance for Open Media. All rights reserved.
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

#include <cstddef>
#include <cstdint>
#include <ostream>
#include <set>
#include <vector>
#include "config/av1_rtcd.h"
#include "config/aom_dsp_rtcd.h"
#include "aom_ports/aom_timer.h"
#include "gtest/gtest.h"
#include "test/acm_random.h"

namespace {

// TODO(any): Remove following INTERP_FILTERS_ALL define, so that 12-tap filter
// is tested once 12-tap filter SIMD is done.
#undef INTERP_FILTERS_ALL
#define INTERP_FILTERS_ALL

// All single reference convolve tests are parameterized on block size,
// bit-depth, and function to test.
//
// Note that parameterizing on these variables (and not other parameters) is
// a conscious decision - Jenkins needs some degree of parallelization to run
// the tests within the time limit, but if the number of parameters increases
// too much, the gtest framework does not handle it well (increased overhead per
// test, huge amount of output to stdout, etc.).
//
// Also note that the test suites must be named with the architecture, e.g.,
// C, C_X, AVX2_X, ... The test suite that runs on Jenkins sometimes runs tests
// that cannot deal with intrinsics (e.g., the Valgrind tests on 32-bit x86
// binaries) and will disable tests using a filter like
// --gtest_filter=-:SSE4_1.*. If the test suites are not named this way, the
// testing infrastructure will not selectively filter them properly.
class BlockSize {};

// Block size / bit depth / test function used to parameterize the tests.
template <typename T>
class TestParam {};

template <typename T>
std::ostream &operator<<(std::ostream &os, const TestParam<T> &test_arg) {}

// Generate the list of all block widths / heights that need to be tested,
// includes chroma and luma sizes, for the given bit-depths. The test
// function is the same for all generated parameters.
template <typename T>
std::vector<TestParam<T>> GetTestParams(std::initializer_list<int> bit_depths,
                                        T test_func) {}

template <typename T>
std::vector<TestParam<T>> GetLowbdTestParams(T test_func) {}

template <typename T>
::testing::internal::ParamGenerator<TestParam<T>> BuildLowbdParams(
    T test_func) {}

// Test the test-parameters generators work as expected.
class AV1ConvolveParametersTest : public ::testing::Test {};

TEST_F(AV1ConvolveParametersTest, GetLowbdTestParams) {}

#if CONFIG_AV1_HIGHBITDEPTH
template <typename T>
std::vector<TestParam<T>> GetHighbdTestParams(T test_func) {
  return GetTestParams({ 10, 12 }, test_func);
}

template <typename T>
::testing::internal::ParamGenerator<TestParam<T>> BuildHighbdParams(
    T test_func) {
  return ::testing::ValuesIn(GetHighbdTestParams(test_func));
}

TEST_F(AV1ConvolveParametersTest, GetHighbdTestParams) {
  auto v = GetHighbdTestParams(av1_highbd_convolve_x_sr_c);
  ASSERT_EQ(54U, v.size());
  int num_10 = 0;
  int num_12 = 0;
  for (const auto &p : v) {
    ASSERT_TRUE(p.BitDepth() == 10 || p.BitDepth() == 12);
    bool same_fn = av1_highbd_convolve_x_sr_c == p.TestFunction();
    ASSERT_TRUE(same_fn);
    if (p.BitDepth() == 10) {
      ++num_10;
    } else {
      ++num_12;
    }
  }
  ASSERT_EQ(num_10, num_12);
}
#endif  // CONFIG_AV1_HIGHBITDEPTH

// AV1ConvolveTest is the base class that all convolve tests should derive from.
// It provides storage/methods for generating randomized buffers for both
// low bit-depth and high bit-depth, and setup/teardown methods for clearing
// system state. Implementors can get the bit-depth / block-size /
// test function by calling GetParam().
template <typename T>
class AV1ConvolveTest : public ::testing::TestWithParam<TestParam<T>> {};

////////////////////////////////////////////////////////
// Single reference convolve-x functions (low bit-depth)
////////////////////////////////////////////////////////
convolve_x_func;

class AV1ConvolveXTest : public AV1ConvolveTest<convolve_x_func> {};

TEST_P(AV1ConvolveXTest, RunTest) {}

TEST_P(AV1ConvolveXTest, DISABLED_SpeedTest) {}

INSTANTIATE_TEST_SUITE_P();

#if HAVE_SSE2
INSTANTIATE_TEST_SUITE_P();
#endif

#if HAVE_AVX2
INSTANTIATE_TEST_SUITE_P();
#endif

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXTest,
                         BuildLowbdParams(av1_convolve_x_sr_neon));
#endif

#if HAVE_NEON_DOTPROD
INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1ConvolveXTest,
                         BuildLowbdParams(av1_convolve_x_sr_neon_dotprod));
#endif

#if HAVE_NEON_I8MM
INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1ConvolveXTest,
                         BuildLowbdParams(av1_convolve_x_sr_neon_i8mm));
#endif

////////////////////////////////////////////////////////////////
// Single reference convolve-x IntraBC functions (low bit-depth)
////////////////////////////////////////////////////////////////

class AV1ConvolveXIntraBCTest : public AV1ConvolveTest<convolve_x_func> {};

TEST_P(AV1ConvolveXIntraBCTest, RunTest) {}

TEST_P(AV1ConvolveXIntraBCTest, DISABLED_SpeedTest) {}

INSTANTIATE_TEST_SUITE_P();

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXIntraBCTest,
                         BuildLowbdParams(av1_convolve_x_sr_intrabc_neon));
#endif

#if CONFIG_AV1_HIGHBITDEPTH
/////////////////////////////////////////////////////////
// Single reference convolve-x functions (high bit-depth)
/////////////////////////////////////////////////////////
typedef void (*highbd_convolve_x_func)(
    const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
    int h, const InterpFilterParams *filter_params_x, const int subpel_x_qn,
    ConvolveParams *conv_params, int bd);

class AV1ConvolveXHighbdTest : public AV1ConvolveTest<highbd_convolve_x_func> {
 public:
  void RunTest() {
    // Do not test the no-op filter.
    for (int sub_x = 1; sub_x < 16; ++sub_x) {
      for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
           ++filter) {
        InterpFilter f = static_cast<InterpFilter>(filter);
        TestConvolve(sub_x, f);
      }
    }
  }

 public:
  void SpeedTest() {
    for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
         ++filter) {
      InterpFilter f = static_cast<InterpFilter>(filter);
      TestConvolveSpeed(f, 10000);
    }
  }

 private:
  void TestConvolve(const int sub_x, const InterpFilter filter) {
    const int width = GetParam().Block().Width();
    const int height = GetParam().Block().Height();
    const int bit_depth = GetParam().BitDepth();
    const InterpFilterParams *filter_params_x =
        av1_get_interp_filter_params_with_block_size(filter, width);
    ConvolveParams conv_params1 =
        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
    const uint16_t *input = FirstRandomInput16(GetParam());
    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
    av1_highbd_convolve_x_sr_c(input, width, reference, kOutputStride, width,
                               height, filter_params_x, sub_x, &conv_params1,
                               bit_depth);

    ConvolveParams conv_params2 =
        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
    GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
                              filter_params_x, sub_x, &conv_params2, bit_depth);
    AssertOutputBufferEq(reference, test, width, height);
  }

 private:
  void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
    const int width = GetParam().Block().Width();
    const int height = GetParam().Block().Height();
    const int bit_depth = GetParam().BitDepth();
    const InterpFilterParams *filter_params_x =
        av1_get_interp_filter_params_with_block_size(filter, width);
    ConvolveParams conv_params1 =
        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    const uint16_t *input = FirstRandomInput16(GetParam());
    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);

    aom_usec_timer timer;
    aom_usec_timer_start(&timer);
    for (int i = 0; i < num_iters; ++i) {
      av1_highbd_convolve_x_sr_c(input, width, reference, kOutputStride, width,
                                 height, filter_params_x, 0, &conv_params1,
                                 bit_depth);
    }
    aom_usec_timer_mark(&timer);
    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    ConvolveParams conv_params2 =
        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    highbd_convolve_x_func test_func = GetParam().TestFunction();
    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);

    aom_usec_timer_start(&timer);
    for (int i = 0; i < num_iters; ++i) {
      test_func(input, width, test, kOutputStride, width, height,
                filter_params_x, 0, &conv_params2, bit_depth);
    }
    aom_usec_timer_mark(&timer);
    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
           time2, time1 / time2);
  }
};

TEST_P(AV1ConvolveXHighbdTest, RunTest) { RunTest(); }

TEST_P(AV1ConvolveXHighbdTest, DISABLED_SpeedTest) { SpeedTest(); }

INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXHighbdTest,
                         BuildHighbdParams(av1_highbd_convolve_x_sr_c));

#if HAVE_SSSE3
INSTANTIATE_TEST_SUITE_P(SSSE3, AV1ConvolveXHighbdTest,
                         BuildHighbdParams(av1_highbd_convolve_x_sr_ssse3));
#endif

#if HAVE_AVX2
INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXHighbdTest,
                         BuildHighbdParams(av1_highbd_convolve_x_sr_avx2));
#endif

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXHighbdTest,
                         BuildHighbdParams(av1_highbd_convolve_x_sr_neon));
#endif

#if HAVE_SVE2
INSTANTIATE_TEST_SUITE_P(SVE2, AV1ConvolveXHighbdTest,
                         BuildHighbdParams(av1_highbd_convolve_x_sr_sve2));
#endif

/////////////////////////////////////////////////////////////////
// Single reference convolve-x IntraBC functions (high bit-depth)
/////////////////////////////////////////////////////////////////

class AV1ConvolveXHighbdIntraBCTest
    : public AV1ConvolveTest<highbd_convolve_x_func> {
 public:
  void RunTest() {
    // IntraBC functions only operate for subpel_x_qn = 8.
    constexpr int kSubX = 8;
    const int width = GetParam().Block().Width();
    const int height = GetParam().Block().Height();
    const int bit_depth = GetParam().BitDepth();
    const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
    const uint16_t *input = FirstRandomInput16(GetParam());

    ConvolveParams conv_params1 =
        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
    // Use a stride different from width to avoid potential storing errors that
    // would go undetected. The input buffer is filled using a padding of 12, so
    // the stride can be anywhere between width and width + 12.
    av1_highbd_convolve_x_sr_intrabc_c(
        input, width + 2, reference, kOutputStride, width, height,
        filter_params_x, kSubX, &conv_params1, bit_depth);

    ConvolveParams conv_params2 =
        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
    GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
                              height, filter_params_x, kSubX, &conv_params2,
                              bit_depth);

    AssertOutputBufferEq(reference, test, width, height);
  }

  void SpeedTest() {
    constexpr int kNumIters = 10000;
    const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
    const int width = GetParam().Block().Width();
    const int height = GetParam().Block().Height();
    const int bit_depth = GetParam().BitDepth();
    const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
    const uint16_t *input = FirstRandomInput16(GetParam());

    ConvolveParams conv_params1 =
        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
    aom_usec_timer timer;
    aom_usec_timer_start(&timer);
    for (int i = 0; i < kNumIters; ++i) {
      av1_highbd_convolve_x_sr_intrabc_c(input, width, reference, kOutputStride,
                                         width, height, filter_params_x, 0,
                                         &conv_params1, bit_depth);
    }
    aom_usec_timer_mark(&timer);
    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));

    ConvolveParams conv_params2 =
        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    highbd_convolve_x_func test_func = GetParam().TestFunction();
    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
    aom_usec_timer_start(&timer);
    for (int i = 0; i < kNumIters; ++i) {
      test_func(input, width, test, kOutputStride, width, height,
                filter_params_x, 0, &conv_params2, bit_depth);
    }
    aom_usec_timer_mark(&timer);
    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));

    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
           time2, time1 / time2);
  }
};

TEST_P(AV1ConvolveXHighbdIntraBCTest, RunTest) { RunTest(); }

TEST_P(AV1ConvolveXHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }

INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXHighbdIntraBCTest,
                         BuildHighbdParams(av1_highbd_convolve_x_sr_intrabc_c));

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(
    NEON, AV1ConvolveXHighbdIntraBCTest,
    BuildHighbdParams(av1_highbd_convolve_x_sr_intrabc_neon));
#endif

#endif  // CONFIG_AV1_HIGHBITDEPTH

////////////////////////////////////////////////////////
// Single reference convolve-y functions (low bit-depth)
////////////////////////////////////////////////////////
convolve_y_func;

class AV1ConvolveYTest : public AV1ConvolveTest<convolve_y_func> {};

TEST_P(AV1ConvolveYTest, RunTest) {}

TEST_P(AV1ConvolveYTest, DISABLED_SpeedTest) {}

INSTANTIATE_TEST_SUITE_P();

#if HAVE_SSE2
INSTANTIATE_TEST_SUITE_P();
#endif

#if HAVE_AVX2
INSTANTIATE_TEST_SUITE_P();
#endif

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYTest,
                         BuildLowbdParams(av1_convolve_y_sr_neon));
#endif

#if HAVE_NEON_DOTPROD
INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1ConvolveYTest,
                         BuildLowbdParams(av1_convolve_y_sr_neon_dotprod));
#endif

#if HAVE_NEON_I8MM
INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1ConvolveYTest,
                         BuildLowbdParams(av1_convolve_y_sr_neon_i8mm));
#endif

////////////////////////////////////////////////////////////////
// Single reference convolve-y IntraBC functions (low bit-depth)
////////////////////////////////////////////////////////////////

class AV1ConvolveYIntraBCTest : public AV1ConvolveTest<convolve_y_func> {};

TEST_P(AV1ConvolveYIntraBCTest, RunTest) {}

TEST_P(AV1ConvolveYIntraBCTest, DISABLED_SpeedTest) {}

INSTANTIATE_TEST_SUITE_P();

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYIntraBCTest,
                         BuildLowbdParams(av1_convolve_y_sr_intrabc_neon));
#endif

#if CONFIG_AV1_HIGHBITDEPTH
/////////////////////////////////////////////////////////
// Single reference convolve-y functions (high bit-depth)
/////////////////////////////////////////////////////////
typedef void (*highbd_convolve_y_func)(
    const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
    int h, const InterpFilterParams *filter_params_y, const int subpel_y_qn,
    int bd);

class AV1ConvolveYHighbdTest : public AV1ConvolveTest<highbd_convolve_y_func> {
 public:
  void RunTest() {
    // Do not test the no-op filter.
    for (int sub_y = 1; sub_y < 16; ++sub_y) {
      for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
           ++filter) {
        InterpFilter f = static_cast<InterpFilter>(filter);
        TestConvolve(sub_y, f);
      }
    }
  }

 public:
  void SpeedTest() {
    for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
         ++filter) {
      InterpFilter f = static_cast<InterpFilter>(filter);
      TestConvolveSpeed(f, 10000);
    }
  }

 private:
  void TestConvolve(const int sub_y, const InterpFilter filter) {
    const int width = GetParam().Block().Width();
    const int height = GetParam().Block().Height();
    const int bit_depth = GetParam().BitDepth();
    const InterpFilterParams *filter_params_y =
        av1_get_interp_filter_params_with_block_size(filter, height);
    const uint16_t *input = FirstRandomInput16(GetParam());
    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
    av1_highbd_convolve_y_sr_c(input, width, reference, kOutputStride, width,
                               height, filter_params_y, sub_y, bit_depth);
    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
    GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
                              filter_params_y, sub_y, bit_depth);
    AssertOutputBufferEq(reference, test, width, height);
  }

 private:
  void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
    const int width = GetParam().Block().Width();
    const int height = GetParam().Block().Height();
    const int bit_depth = GetParam().BitDepth();
    const InterpFilterParams *filter_params_y =
        av1_get_interp_filter_params_with_block_size(filter, width);
    const uint16_t *input = FirstRandomInput16(GetParam());
    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);

    aom_usec_timer timer;
    aom_usec_timer_start(&timer);
    for (int i = 0; i < num_iters; ++i) {
      av1_highbd_convolve_y_sr_c(input, width, reference, kOutputStride, width,
                                 height, filter_params_y, 0, bit_depth);
    }
    aom_usec_timer_mark(&timer);
    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    highbd_convolve_y_func test_func = GetParam().TestFunction();
    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);

    aom_usec_timer_start(&timer);
    for (int i = 0; i < num_iters; ++i) {
      test_func(input, width, test, kOutputStride, width, height,
                filter_params_y, 0, bit_depth);
    }
    aom_usec_timer_mark(&timer);
    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
           time2, time1 / time2);
  }
};

TEST_P(AV1ConvolveYHighbdTest, RunTest) { RunTest(); }

TEST_P(AV1ConvolveYHighbdTest, DISABLED_SpeedTest) { SpeedTest(); }

INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYHighbdTest,
                         BuildHighbdParams(av1_highbd_convolve_y_sr_c));

#if HAVE_SSSE3
INSTANTIATE_TEST_SUITE_P(SSSE3, AV1ConvolveYHighbdTest,
                         BuildHighbdParams(av1_highbd_convolve_y_sr_ssse3));
#endif

#if HAVE_AVX2
INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYHighbdTest,
                         BuildHighbdParams(av1_highbd_convolve_y_sr_avx2));
#endif

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYHighbdTest,
                         BuildHighbdParams(av1_highbd_convolve_y_sr_neon));
#endif

#if HAVE_SVE2
INSTANTIATE_TEST_SUITE_P(SVE2, AV1ConvolveYHighbdTest,
                         BuildHighbdParams(av1_highbd_convolve_y_sr_sve2));
#endif

/////////////////////////////////////////////////////////////////
// Single reference convolve-y IntraBC functions (high bit-depth)
/////////////////////////////////////////////////////////////////

class AV1ConvolveYHighbdIntraBCTest
    : public AV1ConvolveTest<highbd_convolve_y_func> {
 public:
  void RunTest() {
    // IntraBC functions only operate for subpel_y_qn = 8.
    constexpr int kSubY = 8;
    const int width = GetParam().Block().Width();
    const int height = GetParam().Block().Height();
    const int bit_depth = GetParam().BitDepth();
    const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
    const uint16_t *input = FirstRandomInput16(GetParam());

    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
    // Use a stride different from width to avoid potential storing errors that
    // would go undetected. The input buffer is filled using a padding of 12, so
    // the stride can be anywhere between width and width + 12.
    av1_highbd_convolve_y_sr_intrabc_c(input, width + 2, reference,
                                       kOutputStride, width, height,
                                       filter_params_y, kSubY, bit_depth);

    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
    GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
                              height, filter_params_y, kSubY, bit_depth);

    AssertOutputBufferEq(reference, test, width, height);
  }

  void SpeedTest() {
    constexpr int kNumIters = 10000;
    const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
    const int width = GetParam().Block().Width();
    const int height = GetParam().Block().Height();
    const int bit_depth = GetParam().BitDepth();
    const InterpFilterParams *filter_params_y =
        av1_get_interp_filter_params_with_block_size(filter, width);
    const uint16_t *input = FirstRandomInput16(GetParam());

    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
    aom_usec_timer timer;
    aom_usec_timer_start(&timer);
    for (int i = 0; i < kNumIters; ++i) {
      av1_highbd_convolve_y_sr_intrabc_c(input, width, reference, kOutputStride,
                                         width, height, filter_params_y, 0,
                                         bit_depth);
    }
    aom_usec_timer_mark(&timer);
    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));

    highbd_convolve_y_func test_func = GetParam().TestFunction();
    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
    aom_usec_timer_start(&timer);
    for (int i = 0; i < kNumIters; ++i) {
      test_func(input, width, test, kOutputStride, width, height,
                filter_params_y, 0, bit_depth);
    }
    aom_usec_timer_mark(&timer);
    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));

    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
           time2, time1 / time2);
  }
};

TEST_P(AV1ConvolveYHighbdIntraBCTest, RunTest) { RunTest(); }

TEST_P(AV1ConvolveYHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }

INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYHighbdIntraBCTest,
                         BuildHighbdParams(av1_highbd_convolve_y_sr_intrabc_c));

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(
    NEON, AV1ConvolveYHighbdIntraBCTest,
    BuildHighbdParams(av1_highbd_convolve_y_sr_intrabc_neon));
#endif

#endif  // CONFIG_AV1_HIGHBITDEPTH

//////////////////////////////////////////////////////////////
// Single reference convolve-copy functions (low bit-depth)
//////////////////////////////////////////////////////////////
convolve_copy_func;

class AV1ConvolveCopyTest : public AV1ConvolveTest<convolve_copy_func> {};

// Note that even though these are AOM convolve functions, we are using the
// newer AV1 test framework.
TEST_P(AV1ConvolveCopyTest, RunTest) {}

INSTANTIATE_TEST_SUITE_P();

#if HAVE_SSE2
INSTANTIATE_TEST_SUITE_P();
#endif

#if HAVE_AVX2
INSTANTIATE_TEST_SUITE_P();
#endif

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveCopyTest,
                         BuildLowbdParams(aom_convolve_copy_neon));
#endif

#if CONFIG_AV1_HIGHBITDEPTH
///////////////////////////////////////////////////////////////
// Single reference convolve-copy functions (high bit-depth)
///////////////////////////////////////////////////////////////
typedef void (*highbd_convolve_copy_func)(const uint16_t *src,
                                          ptrdiff_t src_stride, uint16_t *dst,
                                          ptrdiff_t dst_stride, int w, int h);

class AV1ConvolveCopyHighbdTest
    : public AV1ConvolveTest<highbd_convolve_copy_func> {
 public:
  void RunTest() {
    const BlockSize &block = GetParam().Block();
    const int width = block.Width();
    const int height = block.Height();
    const uint16_t *input = FirstRandomInput16(GetParam());
    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
    aom_highbd_convolve_copy_c(input, width, reference, kOutputStride, width,
                               height);
    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
    GetParam().TestFunction()(input, width, test, kOutputStride, width, height);
    AssertOutputBufferEq(reference, test, width, height);
  }
};

TEST_P(AV1ConvolveCopyHighbdTest, RunTest) { RunTest(); }

INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveCopyHighbdTest,
                         BuildHighbdParams(aom_highbd_convolve_copy_c));

#if HAVE_SSE2
INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveCopyHighbdTest,
                         BuildHighbdParams(aom_highbd_convolve_copy_sse2));
#endif

#if HAVE_AVX2
INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveCopyHighbdTest,
                         BuildHighbdParams(aom_highbd_convolve_copy_avx2));
#endif

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveCopyHighbdTest,
                         BuildHighbdParams(aom_highbd_convolve_copy_neon));
#endif

#endif  // CONFIG_AV1_HIGHBITDEPTH

/////////////////////////////////////////////////////////
// Single reference convolve-2D functions (low bit-depth)
/////////////////////////////////////////////////////////
convolve_2d_func;

class AV1Convolve2DTest : public AV1ConvolveTest<convolve_2d_func> {};

TEST_P(AV1Convolve2DTest, RunTest) {}

TEST_P(AV1Convolve2DTest, DISABLED_SpeedTest) {}

INSTANTIATE_TEST_SUITE_P();

#if HAVE_SSE2
INSTANTIATE_TEST_SUITE_P();
#endif

#if HAVE_AVX2
INSTANTIATE_TEST_SUITE_P();
#endif

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DTest,
                         BuildLowbdParams(av1_convolve_2d_sr_neon));
#endif

#if HAVE_NEON_DOTPROD
INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1Convolve2DTest,
                         BuildLowbdParams(av1_convolve_2d_sr_neon_dotprod));
#endif

#if HAVE_NEON_I8MM
INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1Convolve2DTest,
                         BuildLowbdParams(av1_convolve_2d_sr_neon_i8mm));
#endif

#if HAVE_SVE2
INSTANTIATE_TEST_SUITE_P(SVE2, AV1Convolve2DTest,
                         BuildLowbdParams(av1_convolve_2d_sr_sve2));
#endif

/////////////////////////////////////////////////////////////////
// Single reference convolve-2D IntraBC functions (low bit-depth)
/////////////////////////////////////////////////////////////////

class AV1Convolve2DIntraBCTest : public AV1ConvolveTest<convolve_2d_func> {};

TEST_P(AV1Convolve2DIntraBCTest, RunTest) {}

TEST_P(AV1Convolve2DIntraBCTest, DISABLED_SpeedTest) {}

INSTANTIATE_TEST_SUITE_P();

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DIntraBCTest,
                         BuildLowbdParams(av1_convolve_2d_sr_intrabc_neon));
#endif

#if CONFIG_AV1_HIGHBITDEPTH
//////////////////////////////////////////////////////////
// Single reference convolve-2d functions (high bit-depth)
//////////////////////////////////////////////////////////

typedef void (*highbd_convolve_2d_func)(
    const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
    int h, const InterpFilterParams *filter_params_x,
    const InterpFilterParams *filter_params_y, const int subpel_x_qn,
    const int subpel_y_qn, ConvolveParams *conv_params, int bd);

class AV1Convolve2DHighbdTest
    : public AV1ConvolveTest<highbd_convolve_2d_func> {
 public:
  void RunTest() {
    // Do not test the no-op filter.
    for (int sub_x = 1; sub_x < 16; ++sub_x) {
      for (int sub_y = 1; sub_y < 16; ++sub_y) {
        for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
          for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
            if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
                ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
              continue;
            TestConvolve(static_cast<InterpFilter>(h_f),
                         static_cast<InterpFilter>(v_f), sub_x, sub_y);
          }
        }
      }
    }
  }

 public:
  void SpeedTest() {
    for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
      for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
        if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
            ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
          continue;
        TestConvolveSpeed(static_cast<InterpFilter>(h_f),
                          static_cast<InterpFilter>(v_f), 10000);
      }
    }
  }

 private:
  void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
                    const int sub_x, const int sub_y) {
    const int width = GetParam().Block().Width();
    const int height = GetParam().Block().Height();
    const int bit_depth = GetParam().BitDepth();
    const InterpFilterParams *filter_params_x =
        av1_get_interp_filter_params_with_block_size(h_f, width);
    const InterpFilterParams *filter_params_y =
        av1_get_interp_filter_params_with_block_size(v_f, height);
    const uint16_t *input = FirstRandomInput16(GetParam());
    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
    ConvolveParams conv_params1 =
        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
    av1_highbd_convolve_2d_sr_c(input, width, reference, kOutputStride, width,
                                height, filter_params_x, filter_params_y, sub_x,
                                sub_y, &conv_params1, bit_depth);
    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
    ConvolveParams conv_params2 =
        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
    GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
                              filter_params_x, filter_params_y, sub_x, sub_y,
                              &conv_params2, bit_depth);
    AssertOutputBufferEq(reference, test, width, height);
  }

  void TestConvolveSpeed(const InterpFilter h_f, const InterpFilter v_f,
                         int num_iters) {
    const int width = GetParam().Block().Width();
    const int height = GetParam().Block().Height();
    const int bit_depth = GetParam().BitDepth();
    const InterpFilterParams *filter_params_x =
        av1_get_interp_filter_params_with_block_size(h_f, width);
    const InterpFilterParams *filter_params_y =
        av1_get_interp_filter_params_with_block_size(v_f, height);
    const uint16_t *input = FirstRandomInput16(GetParam());
    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
    ConvolveParams conv_params1 =
        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    aom_usec_timer timer;
    aom_usec_timer_start(&timer);
    for (int i = 0; i < num_iters; ++i) {
      av1_highbd_convolve_2d_sr_c(input, width, reference, kOutputStride, width,
                                  height, filter_params_x, filter_params_y, 0,
                                  0, &conv_params1, bit_depth);
    }
    aom_usec_timer_mark(&timer);
    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
    ConvolveParams conv_params2 =
        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    aom_usec_timer_start(&timer);
    for (int i = 0; i < num_iters; ++i) {
      GetParam().TestFunction()(input, width, test, kOutputStride, width,
                                height, filter_params_x, filter_params_y, 0, 0,
                                &conv_params2, bit_depth);
    }
    aom_usec_timer_mark(&timer);
    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
           time1, time2, time1 / time2);
  }
};

TEST_P(AV1Convolve2DHighbdTest, RunTest) { RunTest(); }

TEST_P(AV1Convolve2DHighbdTest, DISABLED_SpeedTest) { SpeedTest(); }

INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DHighbdTest,
                         BuildHighbdParams(av1_highbd_convolve_2d_sr_c));

#if HAVE_SSSE3
INSTANTIATE_TEST_SUITE_P(SSSE3, AV1Convolve2DHighbdTest,
                         BuildHighbdParams(av1_highbd_convolve_2d_sr_ssse3));
#endif

#if HAVE_AVX2
INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DHighbdTest,
                         BuildHighbdParams(av1_highbd_convolve_2d_sr_avx2));
#endif

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DHighbdTest,
                         BuildHighbdParams(av1_highbd_convolve_2d_sr_neon));
#endif

#if HAVE_SVE2
INSTANTIATE_TEST_SUITE_P(SVE2, AV1Convolve2DHighbdTest,
                         BuildHighbdParams(av1_highbd_convolve_2d_sr_sve2));
#endif

//////////////////////////////////////////////////////////////////
// Single reference convolve-2d IntraBC functions (high bit-depth)
//////////////////////////////////////////////////////////////////

class AV1Convolve2DHighbdIntraBCTest
    : public AV1ConvolveTest<highbd_convolve_2d_func> {
 public:
  void RunTest() {
    // IntraBC functions only operate for subpel_x_qn = 8 and subpel_y_qn = 8.
    constexpr int kSubX = 8;
    constexpr int kSubY = 8;
    const int width = GetParam().Block().Width();
    const int height = GetParam().Block().Height();
    const int bit_depth = GetParam().BitDepth();
    const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
    const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
    const uint16_t *input = FirstRandomInput16(GetParam());

    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
    ConvolveParams conv_params1 =
        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
    // Use a stride different from width to avoid potential storing errors that
    // would go undetected. The input buffer is filled using a padding of 12, so
    // the stride can be anywhere between width and width + 12.
    av1_highbd_convolve_2d_sr_intrabc_c(input, width + 2, reference,
                                        kOutputStride, width, height,
                                        filter_params_x, filter_params_y, kSubX,
                                        kSubY, &conv_params1, bit_depth);

    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
    ConvolveParams conv_params2 =
        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
    GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
                              height, filter_params_x, filter_params_y, kSubX,
                              kSubY, &conv_params2, bit_depth);

    AssertOutputBufferEq(reference, test, width, height);
  }

  void SpeedTest() {
    constexpr int kNumIters = 10000;
    const InterpFilter h_f = static_cast<InterpFilter>(BILINEAR);
    const InterpFilter v_f = static_cast<InterpFilter>(BILINEAR);
    const int width = GetParam().Block().Width();
    const int height = GetParam().Block().Height();
    const int bit_depth = GetParam().BitDepth();
    const InterpFilterParams *filter_params_x =
        av1_get_interp_filter_params_with_block_size(h_f, width);
    const InterpFilterParams *filter_params_y =
        av1_get_interp_filter_params_with_block_size(v_f, height);
    const uint16_t *input = FirstRandomInput16(GetParam());

    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
    ConvolveParams conv_params1 =
        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    aom_usec_timer timer;
    aom_usec_timer_start(&timer);
    for (int i = 0; i < kNumIters; ++i) {
      av1_highbd_convolve_2d_sr_intrabc_c(
          input, width, reference, kOutputStride, width, height,
          filter_params_x, filter_params_y, 0, 0, &conv_params1, bit_depth);
    }
    aom_usec_timer_mark(&timer);
    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));

    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
    highbd_convolve_2d_func test_func = GetParam().TestFunction();
    ConvolveParams conv_params2 =
        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    aom_usec_timer_start(&timer);
    for (int i = 0; i < kNumIters; ++i) {
      test_func(input, width, test, kOutputStride, width, height,
                filter_params_x, filter_params_y, 0, 0, &conv_params2,
                bit_depth);
    }
    aom_usec_timer_mark(&timer);
    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));

    printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
           time1, time2, time1 / time2);
  }
};

TEST_P(AV1Convolve2DHighbdIntraBCTest, RunTest) { RunTest(); }

TEST_P(AV1Convolve2DHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }

INSTANTIATE_TEST_SUITE_P(
    C, AV1Convolve2DHighbdIntraBCTest,
    BuildHighbdParams(av1_highbd_convolve_2d_sr_intrabc_c));

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(
    NEON, AV1Convolve2DHighbdIntraBCTest,
    BuildHighbdParams(av1_highbd_convolve_2d_sr_intrabc_neon));
#endif

#endif  // CONFIG_AV1_HIGHBITDEPTH

//////////////////////////
// Compound Convolve Tests
//////////////////////////

// The compound functions do not work for chroma block sizes. Provide
// a function to generate test parameters for just luma block sizes.
template <typename T>
std::vector<TestParam<T>> GetLumaTestParams(
    std::initializer_list<int> bit_depths, T test_func) {}

template <typename T>
std::vector<TestParam<T>> GetLowbdLumaTestParams(T test_func) {}

template <typename T>
::testing::internal::ParamGenerator<TestParam<T>> BuildLowbdLumaParams(
    T test_func) {}

TEST_F(AV1ConvolveParametersTest, GetLowbdLumaTestParams) {}

#if CONFIG_AV1_HIGHBITDEPTH
template <typename T>
std::vector<TestParam<T>> GetHighbdLumaTestParams(T test_func) {
  return GetLumaTestParams({ 10, 12 }, test_func);
}

TEST_F(AV1ConvolveParametersTest, GetHighbdLumaTestParams) {
  auto v = GetHighbdLumaTestParams(av1_highbd_dist_wtd_convolve_x_c);
  ASSERT_EQ(44U, v.size());
  int num_10 = 0;
  int num_12 = 0;
  for (const auto &e : v) {
    ASSERT_TRUE(10 == e.BitDepth() || 12 == e.BitDepth());
    bool same_fn = av1_highbd_dist_wtd_convolve_x_c == e.TestFunction();
    ASSERT_TRUE(same_fn);
    if (e.BitDepth() == 10) {
      ++num_10;
    } else {
      ++num_12;
    }
  }
  ASSERT_EQ(num_10, num_12);
}

template <typename T>
::testing::internal::ParamGenerator<TestParam<T>> BuildHighbdLumaParams(
    T test_func) {
  return ::testing::ValuesIn(GetHighbdLumaTestParams(test_func));
}

#endif  // CONFIG_AV1_HIGHBITDEPTH

// Compound cases also need to test different frame offsets and weightings.
class CompoundParam {};

std::vector<CompoundParam> GetCompoundParams() {}

TEST_F(AV1ConvolveParametersTest, GetCompoundParams) {}

////////////////////////////////////////////////
// Compound convolve-x functions (low bit-depth)
////////////////////////////////////////////////

ConvolveParams GetConvolveParams(int do_average, CONV_BUF_TYPE *conv_buf,
                                 int width, int bit_depth,
                                 const CompoundParam &compound) {}

class AV1ConvolveXCompoundTest : public AV1ConvolveTest<convolve_x_func> {};

TEST_P(AV1ConvolveXCompoundTest, RunTest) {}

INSTANTIATE_TEST_SUITE_P();

#if HAVE_SSE2
INSTANTIATE_TEST_SUITE_P();
#endif

#if HAVE_AVX2
INSTANTIATE_TEST_SUITE_P();
#endif

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXCompoundTest,
                         BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon));
#endif

#if HAVE_NEON_DOTPROD
INSTANTIATE_TEST_SUITE_P(
    NEON_DOTPROD, AV1ConvolveXCompoundTest,
    BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon_dotprod));
#endif

#if HAVE_NEON_I8MM
INSTANTIATE_TEST_SUITE_P(
    NEON_I8MM, AV1ConvolveXCompoundTest,
    BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon_i8mm));
#endif

#if CONFIG_AV1_HIGHBITDEPTH
/////////////////////////////////////////////////
// Compound convolve-x functions (high bit-depth)
/////////////////////////////////////////////////
class AV1ConvolveXHighbdCompoundTest
    : public AV1ConvolveTest<highbd_convolve_x_func> {
 public:
  void RunTest() {
    auto compound_params = GetCompoundParams();
    // Do not test the no-op filter.
    for (int sub_pix = 1; sub_pix < 16; ++sub_pix) {
      for (int f = EIGHTTAP_REGULAR; f < INTERP_FILTERS_ALL; ++f) {
        for (const auto &c : compound_params) {
          TestConvolve(sub_pix, static_cast<InterpFilter>(f), c);
        }
      }
    }
  }

 protected:
  virtual const InterpFilterParams *FilterParams(InterpFilter f,
                                                 const BlockSize &block) const {
    return av1_get_interp_filter_params_with_block_size(f, block.Width());
  }

  virtual highbd_convolve_x_func ReferenceFunc() const {
    return av1_highbd_dist_wtd_convolve_x_c;
  }

 private:
  void TestConvolve(const int sub_pix, const InterpFilter filter,
                    const CompoundParam &compound) {
    const int width = GetParam().Block().Width();
    const int height = GetParam().Block().Height();

    const uint16_t *input1 = FirstRandomInput16(GetParam());
    const uint16_t *input2 = SecondRandomInput16(GetParam());
    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
    DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
    Convolve(ReferenceFunc(), input1, input2, reference, reference_conv_buf,
             compound, sub_pix, filter);

    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
    DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
    Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
             compound, sub_pix, filter);

    AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
    AssertOutputBufferEq(reference, test, width, height);
  }

  void Convolve(highbd_convolve_x_func test_func, const uint16_t *src1,
                const uint16_t *src2, uint16_t *dst, CONV_BUF_TYPE *conv_buf,
                const CompoundParam &compound, const int sub_pix,
                const InterpFilter filter) {
    const int width = GetParam().Block().Width();
    const int height = GetParam().Block().Height();
    const int bit_depth = GetParam().BitDepth();
    const InterpFilterParams *filter_params =
        FilterParams(filter, GetParam().Block());
    ConvolveParams conv_params =
        GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound);
    test_func(src1, width, dst, kOutputStride, width, height, filter_params,
              sub_pix, &conv_params, bit_depth);
    conv_params =
        GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound);
    test_func(src2, width, dst, kOutputStride, width, height, filter_params,
              sub_pix, &conv_params, bit_depth);
  }
};

TEST_P(AV1ConvolveXHighbdCompoundTest, RunTest) { RunTest(); }

INSTANTIATE_TEST_SUITE_P(
    C, AV1ConvolveXHighbdCompoundTest,
    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_c));

#if HAVE_SSE4_1
INSTANTIATE_TEST_SUITE_P(
    SSE4_1, AV1ConvolveXHighbdCompoundTest,
    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_sse4_1));
#endif

#if HAVE_AVX2
INSTANTIATE_TEST_SUITE_P(
    AVX2, AV1ConvolveXHighbdCompoundTest,
    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_avx2));
#endif

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(
    NEON, AV1ConvolveXHighbdCompoundTest,
    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_neon));
#endif

#if HAVE_SVE2
INSTANTIATE_TEST_SUITE_P(
    SVE2, AV1ConvolveXHighbdCompoundTest,
    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_sve2));
#endif

#endif  // CONFIG_AV1_HIGHBITDEPTH

////////////////////////////////////////////////
// Compound convolve-y functions (low bit-depth)
////////////////////////////////////////////////

// Note that the X and Y convolve functions have the same type signature and
// logic; they only differentiate the filter parameters and reference function.
class AV1ConvolveYCompoundTest : public AV1ConvolveXCompoundTest {};

TEST_P(AV1ConvolveYCompoundTest, RunTest) {}

INSTANTIATE_TEST_SUITE_P();

#if HAVE_SSE2
INSTANTIATE_TEST_SUITE_P();
#endif

#if HAVE_AVX2
INSTANTIATE_TEST_SUITE_P();
#endif

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYCompoundTest,
                         BuildLowbdLumaParams(av1_dist_wtd_convolve_y_neon));
#endif

#if CONFIG_AV1_HIGHBITDEPTH
/////////////////////////////////////////////////
// Compound convolve-y functions (high bit-depth)
/////////////////////////////////////////////////

// Again, the X and Y convolve functions have the same type signature and logic.
class AV1ConvolveYHighbdCompoundTest : public AV1ConvolveXHighbdCompoundTest {
  highbd_convolve_x_func ReferenceFunc() const override {
    return av1_highbd_dist_wtd_convolve_y_c;
  }
  const InterpFilterParams *FilterParams(
      InterpFilter f, const BlockSize &block) const override {
    return av1_get_interp_filter_params_with_block_size(f, block.Height());
  }
};

TEST_P(AV1ConvolveYHighbdCompoundTest, RunTest) { RunTest(); }

INSTANTIATE_TEST_SUITE_P(
    C, AV1ConvolveYHighbdCompoundTest,
    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_c));

#if HAVE_SSE4_1
INSTANTIATE_TEST_SUITE_P(
    SSE4_1, AV1ConvolveYHighbdCompoundTest,
    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_sse4_1));
#endif

#if HAVE_AVX2
INSTANTIATE_TEST_SUITE_P(
    AVX2, AV1ConvolveYHighbdCompoundTest,
    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_avx2));
#endif

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(
    NEON, AV1ConvolveYHighbdCompoundTest,
    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_neon));
#endif

#if HAVE_SVE2
INSTANTIATE_TEST_SUITE_P(
    SVE2, AV1ConvolveYHighbdCompoundTest,
    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_sve2));
#endif

#endif  // CONFIG_AV1_HIGHBITDEPTH

//////////////////////////////////////////////////////
// Compound convolve-2d-copy functions (low bit-depth)
//////////////////////////////////////////////////////
compound_conv_2d_copy_func;

class AV1Convolve2DCopyCompoundTest
    : public AV1ConvolveTest<compound_conv_2d_copy_func> {};

TEST_P(AV1Convolve2DCopyCompoundTest, RunTest) {}
TEST_P(AV1Convolve2DCopyCompoundTest, DISABLED_SpeedTest) {}

INSTANTIATE_TEST_SUITE_P();

#if HAVE_SSE2
INSTANTIATE_TEST_SUITE_P();
#endif

#if HAVE_AVX2
INSTANTIATE_TEST_SUITE_P();
#endif

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(
    NEON, AV1Convolve2DCopyCompoundTest,
    BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_neon));
#endif

#if CONFIG_AV1_HIGHBITDEPTH
///////////////////////////////////////////////////////
// Compound convolve-2d-copy functions (high bit-depth)
///////////////////////////////////////////////////////
typedef void (*highbd_compound_conv_2d_copy_func)(const uint16_t *src,
                                                  int src_stride, uint16_t *dst,
                                                  int dst_stride, int w, int h,
                                                  ConvolveParams *conv_params,
                                                  int bd);

class AV1Convolve2DCopyHighbdCompoundTest
    : public AV1ConvolveTest<highbd_compound_conv_2d_copy_func> {
 public:
  void RunTest() {
    auto compound_params = GetCompoundParams();
    for (const auto &compound : compound_params) {
      TestConvolve(compound);
    }
  }

 private:
  void TestConvolve(const CompoundParam &compound) {
    const BlockSize &block = GetParam().Block();
    const int width = block.Width();
    const int height = block.Height();

    const uint16_t *input1 = FirstRandomInput16(GetParam());
    const uint16_t *input2 = SecondRandomInput16(GetParam());
    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
    DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
    Convolve(av1_highbd_dist_wtd_convolve_2d_copy_c, input1, input2, reference,
             reference_conv_buf, compound);

    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
    DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
    Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
             compound);

    AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
    AssertOutputBufferEq(reference, test, width, height);
  }

  void Convolve(highbd_compound_conv_2d_copy_func test_func,
                const uint16_t *src1, const uint16_t *src2, uint16_t *dst,
                uint16_t *conv_buf, const CompoundParam &compound) {
    const BlockSize &block = GetParam().Block();
    const int width = block.Width();
    const int height = block.Height();
    const int bit_depth = GetParam().BitDepth();

    ConvolveParams conv_params =
        GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound);
    test_func(src1, width, dst, kOutputStride, width, height, &conv_params,
              bit_depth);

    conv_params =
        GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound);
    test_func(src2, width, dst, kOutputStride, width, height, &conv_params,
              bit_depth);
  }
};

TEST_P(AV1Convolve2DCopyHighbdCompoundTest, RunTest) { RunTest(); }

INSTANTIATE_TEST_SUITE_P(
    C, AV1Convolve2DCopyHighbdCompoundTest,
    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_c));

#if HAVE_SSE4_1
INSTANTIATE_TEST_SUITE_P(
    SSE4_1, AV1Convolve2DCopyHighbdCompoundTest,
    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_sse4_1));
#endif

#if HAVE_AVX2
INSTANTIATE_TEST_SUITE_P(
    AVX2, AV1Convolve2DCopyHighbdCompoundTest,
    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_avx2));
#endif

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(
    NEON, AV1Convolve2DCopyHighbdCompoundTest,
    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_neon));
#endif

#endif  // CONFIG_AV1_HIGHBITDEPTH

/////////////////////////////////////////////////
// Compound convolve-2d functions (low bit-depth)
/////////////////////////////////////////////////

class AV1Convolve2DCompoundTest : public AV1ConvolveTest<convolve_2d_func> {};

TEST_P(AV1Convolve2DCompoundTest, RunTest) {}

INSTANTIATE_TEST_SUITE_P();

#if HAVE_SSSE3
INSTANTIATE_TEST_SUITE_P();
#endif

#if HAVE_AVX2
INSTANTIATE_TEST_SUITE_P();
#endif

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DCompoundTest,
                         BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon));
#endif

#if HAVE_NEON_DOTPROD
INSTANTIATE_TEST_SUITE_P(
    NEON_DOTPROD, AV1Convolve2DCompoundTest,
    BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon_dotprod));
#endif

#if HAVE_NEON_I8MM
INSTANTIATE_TEST_SUITE_P(
    NEON_I8MM, AV1Convolve2DCompoundTest,
    BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon_i8mm));
#endif

#if CONFIG_AV1_HIGHBITDEPTH
//////////////////////////////////////////////////
// Compound convolve-2d functions (high bit-depth)
//////////////////////////////////////////////////

class AV1Convolve2DHighbdCompoundTest
    : public AV1ConvolveTest<highbd_convolve_2d_func> {
 public:
  void RunTest() {
    auto compound_params = GetCompoundParams();
    for (int h_f = EIGHTTAP_REGULAR; h_f < INTERP_FILTERS_ALL; ++h_f) {
      for (int v_f = EIGHTTAP_REGULAR; v_f < INTERP_FILTERS_ALL; ++v_f) {
        // Do not test the no-op filter.
        for (int sub_x = 1; sub_x < 16; ++sub_x) {
          for (int sub_y = 1; sub_y < 16; ++sub_y) {
            for (const auto &compound : compound_params) {
              TestConvolve(static_cast<InterpFilter>(h_f),
                           static_cast<InterpFilter>(v_f), sub_x, sub_y,
                           compound);
            }
          }
        }
      }
    }
  }

 private:
  void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
                    const int sub_x, const int sub_y,
                    const CompoundParam &compound) {
    const BlockSize &block = GetParam().Block();
    const int width = block.Width();
    const int height = block.Height();
    const uint16_t *input1 = FirstRandomInput16(GetParam());
    const uint16_t *input2 = SecondRandomInput16(GetParam());
    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
    DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
    Convolve(av1_highbd_dist_wtd_convolve_2d_c, input1, input2, reference,
             reference_conv_buf, compound, h_f, v_f, sub_x, sub_y);

    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
    DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
    Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
             compound, h_f, v_f, sub_x, sub_y);

    AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
    AssertOutputBufferEq(reference, test, width, height);
  }

 private:
  void Convolve(highbd_convolve_2d_func test_func, const uint16_t *src1,
                const uint16_t *src2, uint16_t *dst, uint16_t *conv_buf,
                const CompoundParam &compound, const InterpFilter h_f,
                const InterpFilter v_f, const int sub_x, const int sub_y) {
    const BlockSize &block = GetParam().Block();
    const int width = block.Width();
    const int height = block.Height();

    const InterpFilterParams *filter_params_x =
        av1_get_interp_filter_params_with_block_size(h_f, width);
    const InterpFilterParams *filter_params_y =
        av1_get_interp_filter_params_with_block_size(v_f, height);
    const int bit_depth = GetParam().BitDepth();
    ConvolveParams conv_params =
        GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound);
    test_func(src1, width, dst, kOutputStride, width, height, filter_params_x,
              filter_params_y, sub_x, sub_y, &conv_params, bit_depth);

    conv_params =
        GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound);
    test_func(src2, width, dst, kOutputStride, width, height, filter_params_x,
              filter_params_y, sub_x, sub_y, &conv_params, bit_depth);
  }
};

TEST_P(AV1Convolve2DHighbdCompoundTest, RunTest) { RunTest(); }

INSTANTIATE_TEST_SUITE_P(
    C, AV1Convolve2DHighbdCompoundTest,
    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_c));

#if HAVE_SSE4_1
INSTANTIATE_TEST_SUITE_P(
    SSE4_1, AV1Convolve2DHighbdCompoundTest,
    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_sse4_1));
#endif

#if HAVE_AVX2
INSTANTIATE_TEST_SUITE_P(
    AVX2, AV1Convolve2DHighbdCompoundTest,
    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_avx2));
#endif

#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(
    NEON, AV1Convolve2DHighbdCompoundTest,
    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_neon));
#endif

#if HAVE_SVE2
INSTANTIATE_TEST_SUITE_P(
    SVE2, AV1Convolve2DHighbdCompoundTest,
    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_sve2));
#endif

#endif  // CONFIG_AV1_HIGHBITDEPTH

}  // namespace