/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/compression/QuotientMultiSet.h>
#include <random>
#include <folly/Format.h>
#include <folly/Random.h>
#include <folly/container/Enumerate.h>
#include <folly/io/IOBufQueue.h>
#include <folly/portability/GTest.h>
#if FOLLY_QUOTIENT_MULTI_SET_SUPPORTED
namespace {
class QuotientMultiSetTest : public ::testing::Test {
protected:
static constexpr uint64_t kBlockSize = folly::QuotientMultiSet<>::kBlockSize;
void SetUp() override { rng.seed(folly::randomNumberSeed()); }
void buildAndValidate(
std::vector<uint64_t>& keys, uint64_t keyBits, double loadFactor) {
// Elements must be added in ascending order.
std::sort(keys.begin(), keys.end());
folly::QuotientMultiSetBuilder builder(keyBits, keys.size(), loadFactor);
folly::IOBufQueue buff;
for (const auto& iter : folly::enumerate(keys)) {
if (builder.insert(*iter)) {
// Set payload to relative position of the first key in block.
builder.setBlockPayload(iter.index);
}
if (builder.numReadyBlocks() >= 1) {
builder.flush(buff);
}
}
builder.close(buff);
auto spBuf = buff.move();
folly::StringPiece data(spBuf->coalesce());
folly::QuotientMultiSet<> reader(data);
size_t index = 0;
folly::QuotientMultiSet<>::Iterator iter(&reader);
while (index < keys.size()) {
uint64_t start = index;
uint64_t& key = keys[start];
auto debugInfo = [&] {
return folly::sformat("Index: {} Key: {}", index, key);
};
while (index < keys.size() && keys[index] == key) {
index++;
}
auto range = reader.equalRange(key);
size_t pos = reader.getBlockPayload(range.begin / kBlockSize);
EXPECT_EQ(start, pos + range.begin % kBlockSize) << debugInfo();
pos = reader.getBlockPayload((range.end - 1) / kBlockSize);
EXPECT_EQ(index - 1, pos + (range.end - 1) % kBlockSize) << debugInfo();
EXPECT_TRUE(iter.skipTo(key));
EXPECT_EQ(key, iter.key()) << debugInfo();
EXPECT_EQ(range.begin, iter.pos()) << debugInfo();
EXPECT_EQ(
start,
reader.getBlockPayload(iter.pos() / kBlockSize) +
iter.pos() % kBlockSize)
<< debugInfo();
if (start < keys.size() - 1) {
EXPECT_TRUE(iter.next());
EXPECT_EQ(keys[start + 1], iter.key());
}
// Verify getting keys not in keys returns false.
uint64_t prevKey = (start == 0 ? 0 : keys[start - 1]);
if (prevKey + 1 < key) {
uint64_t missedKey = folly::Random::rand64(prevKey + 1, key, rng);
EXPECT_FALSE(reader.equalRange(missedKey)) << key;
iter.skipTo(missedKey);
EXPECT_EQ(key, iter.key()) << debugInfo();
EXPECT_EQ(range.begin, iter.pos()) << debugInfo();
EXPECT_EQ(
start,
reader.getBlockPayload(iter.pos() / kBlockSize) +
iter.pos() % kBlockSize)
<< debugInfo();
if (start < keys.size() - 1) {
EXPECT_TRUE(iter.next());
EXPECT_EQ(keys[start + 1], iter.key());
}
}
}
const auto maxKey = folly::qms_detail::maxValue(keyBits);
if (keys.back() < maxKey) {
uint64_t key = folly::Random::rand64(keys.back(), maxKey, rng) + 1;
EXPECT_FALSE(reader.equalRange(key)) << keys.back() << " " << key;
EXPECT_FALSE(iter.done());
EXPECT_FALSE(iter.skipTo(key));
EXPECT_TRUE(iter.done());
}
folly::QuotientMultiSet<>::Iterator nextIter(&reader);
for (const auto key : keys) {
EXPECT_TRUE(nextIter.next());
EXPECT_EQ(key, nextIter.key());
}
EXPECT_FALSE(nextIter.next());
EXPECT_TRUE(nextIter.done());
if (maxKey + 1 != 0) {
folly::QuotientMultiSet<>::Iterator skipToEndIter(&reader);
EXPECT_FALSE(skipToEndIter.done());
EXPECT_FALSE(skipToEndIter.skipTo(maxKey + 1));
EXPECT_TRUE(skipToEndIter.done());
}
}
std::mt19937 rng;
};
} // namespace
TEST_F(QuotientMultiSetTest, Simple) {
std::vector<uint64_t> keys = {
100, 1000, 1 << 14, 10 << 14, 0, 10, 100 << 14, 1000 << 14};
buildAndValidate(keys, 32, 0.95);
}
TEST_F(QuotientMultiSetTest, Empty) {
folly::QuotientMultiSetBuilder builder(32, 1024);
folly::IOBufQueue buff;
builder.close(buff);
auto spBuf = buff.move();
folly::StringPiece data(spBuf->coalesce());
folly::QuotientMultiSet<> reader(data);
for (size_t idx = 0; idx < 1024; idx++) {
uint64_t key = folly::Random::rand32(rng);
EXPECT_FALSE(reader.equalRange(key));
}
}
TEST_F(QuotientMultiSetTest, ZeroKeyBits) {
std::vector<uint64_t> keys(67, 0);
buildAndValidate(keys, 0, 0.95);
}
TEST_F(QuotientMultiSetTest, Uniform) {
constexpr auto kLoadFactor =
folly::QuotientMultiSetBuilder::kDefaultMaxLoadFactor;
constexpr uint64_t kAvgSize = 1 << 16;
auto randSize = [&](uint64_t avgSize) {
return folly::Random::rand64(avgSize / 2, avgSize * 3 / 2, rng);
};
std::vector<std::tuple<int, uint64_t, double>> testCases = {
{1, randSize(1 << 9), kLoadFactor},
{8, randSize(1 << 10), kLoadFactor},
{9, randSize(1 << 11), kLoadFactor},
{12, randSize(kAvgSize), kLoadFactor},
{32, randSize(kAvgSize), kLoadFactor},
{48, randSize(kAvgSize), kLoadFactor},
{64, randSize(kAvgSize), kLoadFactor},
{32, randSize(kAvgSize), 1}, // Full
{12, 3800, kLoadFactor}, // Almost full
{64, randSize(16), kLoadFactor}, // Sparse, long keys.
};
for (const auto& testCase : testCases) {
const auto& [keyBits, size, loadFactor] = testCase;
SCOPED_TRACE(folly::sformat(
"Key bits: {} Size: {} Load factor: {}", keyBits, size, loadFactor));
std::vector<uint64_t> keys;
for (uint64_t idx = 0; idx < size; idx++) {
keys.emplace_back(
folly::Random::rand64(rng) & folly::qms_detail::maxValue(keyBits));
}
buildAndValidate(keys, keyBits, loadFactor);
}
}
TEST_F(QuotientMultiSetTest, UniformDistributionFullLoadFactor) {
const uint64_t numElements = 1 << 16;
std::vector<uint64_t> keys;
for (uint64_t idx = 0; idx < numElements; idx++) {
uint64_t key = folly::Random::rand32(idx << 16, (idx + 1) << 16, rng);
keys.emplace_back(key);
}
buildAndValidate(keys, 32, 1.0);
}
TEST_F(QuotientMultiSetTest, Overflow) {
const uint64_t numElements = 1 << 12;
std::vector<uint64_t> keys;
for (uint64_t idx = 0; idx < numElements; idx++) {
keys.emplace_back(idx);
keys.emplace_back(idx);
keys.emplace_back(idx);
}
buildAndValidate(keys, 12, 0.95);
}
TEST_F(QuotientMultiSetTest, RandomLengthRuns) {
const uint64_t numElements = 1 << 16;
std::vector<uint64_t> keys;
for (uint64_t idx = 0; idx < (numElements >> 4); idx++) {
uint64_t key = folly::Random::rand32(rng);
uint64_t length = folly::Random::rand32(0, 10, rng);
for (uint64_t k = 0; k < length; k++) {
keys.emplace_back(key + k);
}
}
buildAndValidate(keys, 32, 0.95);
}
TEST_F(QuotientMultiSetTest, RunAcrossBlocks) {
const uint64_t numElements = 1 << 10;
std::vector<uint64_t> keys;
// Add keys with cluster size 137.
for (uint64_t idx = 0; idx < (numElements >> 4); idx++) {
uint64_t key = folly::Random::rand32(rng);
for (uint64_t k = 0; k < 136; k++) {
key += k;
keys.emplace_back(key);
}
}
buildAndValidate(keys, 32, 0.95);
}
TEST_F(QuotientMultiSetTest, PackAtHeadSlots) {
const uint64_t numElements = 1 << 12;
std::vector<uint64_t> keys;
for (uint64_t idx = 0; idx < numElements; idx++) {
uint64_t key = folly::Random::rand32(idx << 8, (idx + 1) << 8, rng);
keys.emplace_back(key);
}
buildAndValidate(keys, 32, 0.95);
}
TEST_F(QuotientMultiSetTest, PackAtTailSlots) {
const uint64_t numElements = 1 << 12;
std::vector<uint64_t> keys;
uint64_t key = (1 << 30);
for (uint64_t idx = 0; idx < numElements; idx++) {
keys.emplace_back(key + idx);
}
buildAndValidate(keys, 32, 0.95);
}
TEST_F(QuotientMultiSetTest, KeysOnlyInHeadAndTail) {
const uint64_t numElements = 1 << 11;
std::vector<uint64_t> keys;
for (uint64_t idx = 0; idx < numElements; idx++) {
keys.emplace_back(idx);
}
uint64_t key = (1 << 30);
for (uint64_t idx = 0; idx < numElements; idx++) {
keys.emplace_back(key + idx);
}
buildAndValidate(keys, 32, 0.95);
}
TEST_F(QuotientMultiSetTest, RunendRightBeforeFirstOccupiedRunend) {
std::vector<uint64_t> keys;
// 60 ranges [0, 67] with occupied slot 20.
for (size_t idx = 0; idx < 68; idx++) {
keys.push_back(60);
}
// 60 ranges [68, 68] with occupied slot 66.
for (size_t idx = 0; idx < 1; idx++) {
keys.push_back(200);
}
// 60 ranges [69, 88] with occupied slot 83.
for (size_t idx = 0; idx < 20; idx++) {
keys.push_back(250);
}
buildAndValidate(keys, 8, 0.95);
}
#endif // FOLLY_QUOTIENT_MULTI_SET_SUPPORTED