/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/compression/QuotientMultiSet.h>
#include <boost/sort/spreadsort/integer_sort.hpp>
#include <folly/Benchmark.h>
#include <folly/Format.h>
#include <folly/Random.h>
#include <folly/String.h>
#include <folly/compression/elias_fano/EliasFanoCoding.h>
#include <folly/container/Enumerate.h>
#include <folly/container/F14Set.h>
#include <folly/container/Foreach.h>
#include <folly/experimental/test/CodingTestUtils.h>
#include <folly/init/Init.h>
DEFINE_int64(
key_bits,
32,
"The number of key bits used in quotient multiset. Should always <= 64");
DEFINE_uint64(
num_elements,
100000000,
"The number of elements inserted into quotient multiset");
DEFINE_double(load_factor, 0.95, "Load factor of the multiset");
#if FOLLY_QUOTIENT_MULTI_SET_SUPPORTED
using std::string;
namespace {
static const unsigned int kRunsPerIteration = 5000000;
std::mt19937 rng;
// Uniformly distributed keys.
std::vector<uint64_t> uniform;
string qmsData;
uint64_t maxValue(uint32_t nbits) {
return nbits == 64 ? std::numeric_limits<uint64_t>::max()
: (uint64_t(1) << nbits) - 1;
}
// Hash 64 bits into 1.
uint64_t mix64_to_1(uint64_t word) {
// Constant from MurmurHash3 final mixer.
return (word * UINT64_C(0xff51afd7ed558ccd)) >> 63;
}
void buildQuotientMultiSet(std::vector<uint64_t>& keys) {
folly::QuotientMultiSetBuilder builder(
FLAGS_key_bits, keys.size(), FLAGS_load_factor);
folly::IOBufQueue buff;
for (const auto& iter : folly::enumerate(keys)) {
if (builder.insert(*iter)) {
builder.setBlockPayload(iter.index);
}
if (builder.numReadyBlocks() >= 1) {
builder.flush(buff);
}
}
builder.close(buff);
qmsData = buff.move()->to<string>();
}
std::vector<uint64_t> makeLookupKeys(size_t n, double hitRate) {
folly::BenchmarkSuspender guard;
std::vector<uint64_t> keys;
keys.reserve(n);
uint64_t maxKey = maxValue(FLAGS_key_bits);
for (uint64_t idx = 0; idx < n; idx++) {
keys.push_back(
(folly::Random::randDouble01(rng) < hitRate)
? uniform[folly::Random::rand64(uniform.size(), rng)]
: folly::Random::rand64(rng) & maxKey);
}
return keys;
}
const folly::F14FastSet<uint64_t>& getF14Baseline() {
folly::BenchmarkSuspender guard;
static const auto set = [] {
folly::F14FastSet<uint64_t> ret(uniform.begin(), uniform.end());
LOG(INFO) << folly::sformat(
"Built F14FastSet, size: {}, space: {}",
ret.size(),
folly::prettyPrint(
ret.getAllocatedMemorySize(), folly::PrettyType::PRETTY_BYTES_IEC));
return ret;
}();
return set;
}
using EFEncoder =
folly::compression::EliasFanoEncoder<uint64_t, uint64_t, 128, 128>;
const folly::compression::MutableEliasFanoCompressedList& getEFBaseline() {
folly::BenchmarkSuspender guard;
static auto list = [] {
auto ret = EFEncoder::encode(uniform.begin(), uniform.end());
LOG(INFO) << folly::sformat(
"Built Elias-Fano list, space: {}",
folly::prettyPrint(
ret.data.size(), folly::PrettyType::PRETTY_BYTES_IEC));
return ret;
}();
return list;
}
template <class Lookup>
size_t benchmarkHits(const Lookup& lookup) {
auto keys = makeLookupKeys(kRunsPerIteration, /* hitRate */ 1);
for (const auto& key : keys) {
auto found = lookup(key);
folly::doNotOptimizeAway(found);
DCHECK(found);
}
return kRunsPerIteration;
}
template <class Lookup>
size_t benchmarkRandom(const Lookup& lookup) {
auto keys = makeLookupKeys(kRunsPerIteration, /* hitRate */ 0);
for (const auto& key : keys) {
auto found = lookup(key);
folly::doNotOptimizeAway(found);
}
return kRunsPerIteration;
}
template <class Lookup>
size_t benchmarkMixtureSerialized(const Lookup& lookup) {
// Make the result unpredictable so we can use it to introduce a
// serializing dependency in the loop.
auto keys = makeLookupKeys(kRunsPerIteration * 2, /* hitRate */ 0.5);
size_t unpredictableBit = 0;
for (size_t i = 0; i < kRunsPerIteration * 2; i += 2) {
unpredictableBit = lookup(keys[i + unpredictableBit]) ? 1 : 0;
folly::doNotOptimizeAway(unpredictableBit);
}
return kRunsPerIteration;
}
template <class Lookup>
size_t benchmarkSerializedOnResultBits(const Lookup& lookup, double hitRate) {
auto keys = makeLookupKeys(kRunsPerIteration * 2, hitRate);
size_t unpredictableBit = 0;
for (size_t i = 0; i < kRunsPerIteration * 2; i += 2) {
// When all keys are hits we can use a hash of the location of the
// found key to introduce a serializing dependency in the loop.
unpredictableBit = mix64_to_1(lookup(keys[i + unpredictableBit]));
folly::doNotOptimizeAway(unpredictableBit);
}
return kRunsPerIteration;
}
template <class Lookup>
size_t benchmarkHitsSerialized(const Lookup& lookup) {
return benchmarkSerializedOnResultBits(lookup, /* hitRate */ 1);
}
template <class Lookup>
size_t benchmarkRandomSerialized(const Lookup& lookup) {
return benchmarkSerializedOnResultBits(lookup, /* hitRate */ 0);
}
template <class Lookup>
size_t benchmarkHitsSmallWorkingSet(const Lookup& lookup) {
// Loop over a small set of keys so that after the first iteration
// all the relevant parts of the data structure are in LLC cache.
constexpr size_t kLookupSetSize = 1 << 12; // Should be a power of 2.
auto keys = makeLookupKeys(kLookupSetSize, /* hitRate */ 1);
for (size_t i = 0; i < kRunsPerIteration; ++i) {
auto found = lookup(keys[i % kLookupSetSize]);
folly::doNotOptimizeAway(found);
DCHECK(found);
}
return kRunsPerIteration;
}
} // namespace
void benchmarkSetup() {
rng.seed(UINT64_C(12345678));
uint64_t maxKey = maxValue(FLAGS_key_bits);
// Uniformly distributed keys.
const uint64_t size = FLAGS_num_elements;
for (uint64_t idx = 0; idx < size; idx++) {
uint64_t key = folly::Random::rand64(rng) & maxKey;
uniform.emplace_back(key);
}
boost::sort::spreadsort::integer_sort(uniform.begin(), uniform.end());
buildQuotientMultiSet(uniform);
LOG(INFO) << folly::sformat(
"Built QuotientMultiSet, space: {}",
folly::prettyPrint(qmsData.size(), folly::PrettyType::PRETTY_BYTES_IEC));
}
BENCHMARK_MULTI(QuotientMultiSetGetHits) {
size_t ret = 0;
folly::compression::dispatchInstructions([&](auto instructions) {
auto reader = folly::QuotientMultiSet<decltype(instructions)>(qmsData);
ret = benchmarkHits([&](uint64_t key) { return reader.equalRange(key); });
});
return ret;
}
BENCHMARK_MULTI(QuotientMultiSetGetRandom) {
size_t ret = 0;
folly::compression::dispatchInstructions([&](auto instructions) {
auto reader = folly::QuotientMultiSet<decltype(instructions)>(qmsData);
ret = benchmarkRandom([&](uint64_t key) { return reader.equalRange(key); });
});
return ret;
}
BENCHMARK_MULTI(QuotientMultiSetGetHitsSmallWorkingSet) {
size_t ret = 0;
folly::compression::dispatchInstructions([&](auto instructions) {
auto reader = folly::QuotientMultiSet<decltype(instructions)>(qmsData);
ret = benchmarkHitsSmallWorkingSet(
[&](uint64_t key) { return reader.equalRange(key); });
});
return ret;
}
BENCHMARK_MULTI(QuotientMultiSetGetMixtureSerialized) {
size_t ret = 0;
folly::compression::dispatchInstructions([&](auto instructions) {
auto reader = folly::QuotientMultiSet<decltype(instructions)>(qmsData);
ret = benchmarkMixtureSerialized(
[&](uint64_t key) { return reader.equalRange(key); });
});
return ret;
}
BENCHMARK_MULTI(QuotientMultiSetGetHitsSerialized) {
size_t ret = 0;
folly::compression::dispatchInstructions([&](auto instructions) {
auto reader = folly::QuotientMultiSet<decltype(instructions)>(qmsData);
ret = benchmarkHitsSerialized(
[&](uint64_t key) { return reader.equalRange(key).begin; });
});
return ret;
}
BENCHMARK_MULTI(QuotientMultiSetGetRandomSerialized) {
size_t ret = 0;
folly::compression::dispatchInstructions([&](auto instructions) {
auto reader = folly::QuotientMultiSet<decltype(instructions)>(qmsData);
ret = benchmarkRandomSerialized([&](uint64_t key) {
// Even without a match, begin will depend on the whole
// computation.
return reader.equalRange(key).begin;
});
});
return ret;
}
BENCHMARK_DRAW_LINE();
BENCHMARK_MULTI(F14MapHits) {
const auto& baseline = getF14Baseline();
return benchmarkHits(
[&](uint64_t key) { return baseline.find(key) != baseline.end(); });
}
BENCHMARK_MULTI(F14MapRandom) {
const auto& baseline = getF14Baseline();
return benchmarkRandom(
[&](uint64_t key) { return baseline.find(key) != baseline.end(); });
}
BENCHMARK_MULTI(F14MapHitsSmallWorkingSet) {
const auto& baseline = getF14Baseline();
return benchmarkHitsSmallWorkingSet(
[&](uint64_t key) { return baseline.find(key) != baseline.end(); });
}
BENCHMARK_MULTI(F14MapMixtureSerialized) {
const auto& baseline = getF14Baseline();
return benchmarkMixtureSerialized(
[&](uint64_t key) { return baseline.find(key) != baseline.end(); });
}
BENCHMARK_MULTI(F14MapHitsSerialized) {
const auto& baseline = getF14Baseline();
return benchmarkHitsSerialized([&](uint64_t key) {
return reinterpret_cast<uintptr_t>(&*baseline.find(key));
});
}
// benchmarkRandomSerialized() cannot be used with F14.
BENCHMARK_DRAW_LINE();
using folly::compression::EliasFanoReader;
BENCHMARK_MULTI(EliasFanoGetHits) {
auto list = getEFBaseline();
size_t ret = 0;
folly::compression::dispatchInstructions([&](auto instructions) {
auto reader = EliasFanoReader<EFEncoder, decltype(instructions)>(list);
ret = benchmarkHits([&](uint64_t key) {
return reader.jumpTo(key) && reader.value() == key;
});
});
return ret;
}
BENCHMARK_MULTI(EliasFanoGetRandom) {
auto list = getEFBaseline();
size_t ret = 0;
folly::compression::dispatchInstructions([&](auto instructions) {
auto reader = EliasFanoReader<EFEncoder, decltype(instructions)>(list);
ret = benchmarkRandom([&](uint64_t key) {
return reader.jumpTo(key) && reader.value() == key;
});
});
return ret;
}
BENCHMARK_MULTI(EliasFanoGetHitsSmallWorkingSet) {
auto list = getEFBaseline();
size_t ret = 0;
folly::compression::dispatchInstructions([&](auto instructions) {
auto reader = EliasFanoReader<EFEncoder, decltype(instructions)>(list);
ret = benchmarkHitsSmallWorkingSet([&](uint64_t key) {
return reader.jumpTo(key) && reader.value() == key;
});
});
return ret;
}
BENCHMARK_MULTI(EliasFanoGetMixtureSerialized) {
auto list = getEFBaseline();
size_t ret = 0;
folly::compression::dispatchInstructions([&](auto instructions) {
auto reader = EliasFanoReader<EFEncoder, decltype(instructions)>(list);
ret = benchmarkMixtureSerialized([&](uint64_t key) {
return reader.jumpTo(key) && reader.value() == key;
});
});
return ret;
}
BENCHMARK_MULTI(EliasFanoGetHitsSerialized) {
auto list = getEFBaseline();
size_t ret = 0;
folly::compression::dispatchInstructions([&](auto instructions) {
auto reader = EliasFanoReader<EFEncoder, decltype(instructions)>(list);
ret = benchmarkHitsSerialized([&](uint64_t key) {
reader.jumpTo(key);
DCHECK(reader.valid());
return reader.position();
});
});
return ret;
}
BENCHMARK_MULTI(EliasFanoGetRandomSerialized) {
auto list = getEFBaseline();
size_t ret = 0;
folly::compression::dispatchInstructions([&](auto instructions) {
auto reader = EliasFanoReader<EFEncoder, decltype(instructions)>(list);
ret = benchmarkRandomSerialized([&](uint64_t key) {
reader.jumpTo(key);
return reader.position();
});
});
return ret;
}
#endif // FOLLY_QUOTIENT_MULTI_SET_SUPPORTED
int main(int argc, char** argv) {
folly::Init init(&argc, &argv);
#if FOLLY_QUOTIENT_MULTI_SET_SUPPORTED
benchmarkSetup();
folly::runBenchmarks();
#endif // FOLLY_QUOTIENT_MULTI_SET_SUPPORTED
}
#if 0
// Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GHz, built with Clang.
$ buck run @mode/opt folly/experimental/test:quotient_multiset_benchmark -- --benchmark --bm_max_secs 3
I0917 10:28:54.497815 2874547 QuotientMultiSetBenchmark.cpp:195] Built QuotientMultiSet, space: 124.9 MiB
============================================================================
folly/experimental/test/QuotientMultiSetBenchmark.cpprelative time/iter iters/s
============================================================================
QuotientMultiSetGetHits 114.94ns 8.70M
QuotientMultiSetGetRandom 72.87ns 13.72M
QuotientMultiSetGetHitsSmallWorkingSet 58.75ns 17.02M
QuotientMultiSetGetMixtureSerialized 138.73ns 7.21M
QuotientMultiSetGetHitsSerialized 138.56ns 7.22M
QuotientMultiSetGetRandomSerialized 130.11ns 7.69M
----------------------------------------------------------------------------
I0917 10:29:33.808831 2874547 QuotientMultiSetBenchmark.cpp:83] Built F14FastSet, size: 98843868, space: 1 GiB
F14MapHits 34.69ns 28.83M
F14MapRandom 47.22ns 21.18M
F14MapHitsSmallWorkingSet 14.42ns 69.34M
F14MapMixtureSerialized 94.64ns 10.57M
F14MapHitsSerialized 140.74ns 7.11M
----------------------------------------------------------------------------
I0917 10:29:52.722596 2874547 QuotientMultiSetBenchmark.cpp:100] Built Elias-Fano list, space: 101.5 MiB
EliasFanoGetHits 258.08ns 3.87M
EliasFanoGetRandom 247.18ns 4.05M
EliasFanoGetHitsSmallWorkingSet 81.54ns 12.26M
EliasFanoGetMixtureSerialized 253.63ns 3.94M
EliasFanoGetHitsSerialized 161.57ns 6.19M
EliasFanoGetRandomSerialized 161.55ns 6.19M
============================================================================
#endif