folly/folly/stats/test/DigestBuilderBenchmark.cpp

/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <folly/stats/DigestBuilder.h>

#include <chrono>
#include <condition_variable>
#include <thread>

#include <boost/thread/barrier.hpp>

#include <folly/Benchmark.h>
#include <folly/Range.h>
#include <folly/lang/Keep.h>
#include <folly/portability/GFlags.h>

DEFINE_int32(digest_merge_time_ns, 5500, "Time to merge into the digest");

using namespace folly;

class FreeDigest {
 public:
  explicit FreeDigest(size_t = 0) {}

  FreeDigest merge(Range<const double*> values) const {
    auto start = std::chrono::steady_clock::now();
    auto finish = start + std::chrono::nanoseconds{FLAGS_digest_merge_time_ns};
    while (std::chrono::steady_clock::now() < finish) {
    }
    FreeDigest ret;
    ret.empty_ = empty_ && values.empty();
    return ret;
  }

  bool empty() const { return empty_; }

 private:
  bool empty_ = true;
};

extern "C" FOLLY_KEEP void check_folly_digest_builder_append(
    DigestBuilder<FreeDigest>& builder, double value) {
  builder.append(value);
}

unsigned int append(unsigned int iters, size_t bufSize, size_t nThreads) {
  iters = 1000000;
  auto buffer = std::make_shared<DigestBuilder<FreeDigest>>(bufSize, 100);

  auto barrier = std::make_shared<boost::barrier>(nThreads + 1);

  std::vector<std::thread> threads;
  threads.reserve(nThreads);
  BENCHMARK_SUSPEND {
    for (size_t i = 0; i < nThreads; ++i) {
      threads.emplace_back([&]() {
        barrier->wait();
        for (size_t iter = 0; iter < iters; ++iter) {
          buffer->append(iter);
        }
        barrier->wait();
      });
    }
    barrier->wait();
  }
  barrier->wait();

  BENCHMARK_SUSPEND {
    for (auto& thread : threads) {
      thread.join();
    }
  }
  return iters;
}

BENCHMARK_NAMED_PARAM_MULTI(append, 1000x1, 1000, 1)
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(append, 1000x2, 1000, 2)
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(append, 1000x4, 1000, 4)
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(append, 1000x8, 1000, 8)
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(append, 1000x16, 1000, 16)
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(append, 1000x32, 1000, 32)
BENCHMARK_DRAW_LINE();
BENCHMARK_NAMED_PARAM_MULTI(append, 10000x1, 10000, 1)
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(append, 10000x2, 10000, 2)
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(append, 10000x4, 10000, 4)
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(append, 10000x8, 10000, 8)
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(append, 10000x16, 10000, 16)
BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(append, 10000x32, 10000, 32)

/*
 * ./digest_buffer_benchmark
 * ============================================================================
 * folly/stats/test/DigestBuilderBenchmark.cpp     relative  time/iter  iters/s
 * ============================================================================
 * append(1000x1)                                              16.32ns   61.26M
 * append(1000x2)                                    96.42%    16.93ns   59.07M
 * append(1000x4)                                    93.57%    17.44ns   57.33M
 * append(1000x8)                                    93.43%    17.47ns   57.24M
 * append(1000x16)                                   92.96%    17.56ns   56.95M
 * append(1000x32)                                   29.23%    55.84ns   17.91M
 * ----------------------------------------------------------------------------
 * append(10000x1)                                             11.58ns   86.33M
 * append(10000x2)                                   95.82%    12.09ns   82.72M
 * append(10000x4)                                   89.00%    13.01ns   76.84M
 * append(10000x8)                                   88.63%    13.07ns   76.51M
 * append(10000x16)                                  88.22%    13.13ns   76.16M
 * append(10000x32)                                  24.89%    46.54ns   21.49M
 * ============================================================================
 */

int main(int argc, char* argv[]) {
  gflags::ParseCommandLineFlags(&argc, &argv, true);
  folly::runBenchmarks();
  return 0;
}