/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/memory/ThreadCachedArena.h>
#include <algorithm>
#include <iterator>
#include <map>
#include <mutex>
#include <random>
#include <thread>
#include <unordered_map>
#include <glog/logging.h>
#include <folly/Benchmark.h>
#include <folly/Memory.h>
#include <folly/Range.h>
#include <folly/lang/Align.h>
#include <folly/portability/GTest.h>
using namespace folly;
namespace {
class ArenaTester {
public:
explicit ArenaTester(ThreadCachedArena& arena) : arena_(&arena) {}
void allocate(size_t count, size_t maxSize);
void verify();
void merge(ArenaTester&& other);
private:
std::mutex mergeMutex_;
std::vector<std::pair<uint8_t, Range<uint8_t*>>> areas_;
ThreadCachedArena* arena_;
};
void ArenaTester::allocate(size_t count, size_t maxSize) {
// Allocate chunks of memory of random sizes
std::random_device rd{};
std::mt19937 rnd{rd()};
std::uniform_int_distribution<uint32_t> sizeDist(1, maxSize - 1);
areas_.clear();
areas_.reserve(count);
for (size_t i = 0; i < count; i++) {
size_t size = sizeDist(rnd);
uint8_t* p = static_cast<uint8_t*>(arena_->allocate(size));
areas_.emplace_back(uint8_t(rnd() & 0xff), Range<uint8_t*>(p, size));
}
// Fill each area with a different value, to prove that they don't overlap
// Fill in random order.
std::shuffle(areas_.begin(), areas_.end(), rnd);
for (auto& p : areas_) {
std::fill(p.second.begin(), p.second.end(), p.first);
}
}
void ArenaTester::verify() {
for (auto& p : areas_) {
for (auto v : p.second) {
EXPECT_EQ(p.first, v);
}
}
}
void ArenaTester::merge(ArenaTester&& other) {
{
std::lock_guard<std::mutex> lock(mergeMutex_);
std::move(
other.areas_.begin(), other.areas_.end(), std::back_inserter(areas_));
}
other.areas_.clear();
}
} // namespace
TEST(ThreadCachedArena, BlockSize) {
static const size_t alignment = max_align_v;
static const size_t requestedBlockSize = 64;
ThreadCachedArena arena(requestedBlockSize);
size_t blockSize = alignment;
uint8_t* prev = static_cast<uint8_t*>(arena.allocate(1));
// Keep allocating until we're no longer one single alignment away from the
// previous allocation -- that's when we've gotten to the next block.
uint8_t* p;
while ((p = static_cast<uint8_t*>(arena.allocate(1))) == prev + alignment) {
prev = p;
blockSize += alignment;
}
VLOG(1) << "Requested block size: " << requestedBlockSize
<< ", actual: " << blockSize;
EXPECT_LE(requestedBlockSize, blockSize);
}
TEST(ThreadCachedArena, SingleThreaded) {
static const size_t requestedBlockSize = 64;
ThreadCachedArena arena(requestedBlockSize);
EXPECT_EQ(arena.totalSize(), sizeof(ThreadCachedArena));
ArenaTester tester(arena);
tester.allocate(100, 100 << 10);
tester.verify();
EXPECT_GT(arena.totalSize(), sizeof(ThreadCachedArena));
}
TEST(ThreadCachedArena, MultiThreaded) {
static const size_t requestedBlockSize = 64;
ThreadCachedArena arena(requestedBlockSize);
ArenaTester mainTester(arena);
// Do this twice, to catch the possibility that memory from the first
// round gets freed
static const size_t numThreads = 20;
for (size_t i = 0; i < 2; i++) {
std::vector<std::thread> threads;
threads.reserve(numThreads);
for (size_t j = 0; j < numThreads; j++) {
threads.emplace_back([&arena, &mainTester]() {
ArenaTester tester(arena);
tester.allocate(500, 1 << 10);
tester.verify();
mainTester.merge(std::move(tester));
});
}
for (auto& t : threads) {
t.join();
}
}
mainTester.verify();
}
TEST(ThreadCachedArena, ThreadCachedArenaAllocator) {
using Map = std::unordered_map<
int,
int,
std::hash<int>,
std::equal_to<int>,
ThreadCachedArenaAllocator<std::pair<const int, int>>>;
static const size_t requestedBlockSize = 64;
ThreadCachedArena arena(requestedBlockSize);
Map map{
0,
std::hash<int>(),
std::equal_to<int>(),
ThreadCachedArenaAllocator<std::pair<const int, int>>(arena)};
for (int i = 0; i < 1000; i++) {
map[i] = i;
}
for (int i = 0; i < 1000; i++) {
EXPECT_EQ(i, map[i]);
}
}
namespace {
static const int kNumValues = 10000;
BENCHMARK(bmUMStandard, iters) {
using Map = std::unordered_map<int, int>;
while (iters--) {
Map map{0};
for (int i = 0; i < kNumValues; i++) {
map[i] = i;
}
}
}
BENCHMARK(bmUMArena, iters) {
using Map = std::unordered_map<
int,
int,
std::hash<int>,
std::equal_to<int>,
ThreadCachedArenaAllocator<std::pair<const int, int>>>;
while (iters--) {
ThreadCachedArena arena;
Map map{
0,
std::hash<int>(),
std::equal_to<int>(),
ThreadCachedArenaAllocator<std::pair<const int, int>>(arena)};
for (int i = 0; i < kNumValues; i++) {
map[i] = i;
}
}
}
BENCHMARK_DRAW_LINE();
BENCHMARK(bmMStandard, iters) {
using Map = std::map<int, int>;
while (iters--) {
Map map;
for (int i = 0; i < kNumValues; i++) {
map[i] = i;
}
}
}
BENCHMARK_DRAW_LINE();
BENCHMARK(bmMArena, iters) {
using Map = std::map<
int,
int,
std::less<int>,
ThreadCachedArenaAllocator<std::pair<const int, int>>>;
while (iters--) {
ThreadCachedArena arena;
Map map{
std::less<int>(),
ThreadCachedArenaAllocator<std::pair<const int, int>>(arena)};
for (int i = 0; i < kNumValues; i++) {
map[i] = i;
}
}
}
BENCHMARK_DRAW_LINE();
} // namespace
// Benchmark Iters Total t t/iter iter/sec
// ----------------------------------------------------------------------------
// Comparing benchmarks: bmUMStandard,bmUMArena
// + 143% bmUMStandard 1570 2.005 s 1.277 ms 782.9
// * bmUMArena 3817 2.003 s 524.7 us 1.861 k
// ----------------------------------------------------------------------------
// Comparing benchmarks: bmMStandard,bmMArena
// +79.0% bmMStandard 1197 2.009 s 1.678 ms 595.8
// * bmMArena 2135 2.002 s 937.6 us 1.042 k
// ----------------------------------------------------------------------------
int main(int argc, char* argv[]) {
testing::InitGoogleTest(&argc, argv);
gflags::ParseCommandLineFlags(&argc, &argv, true);
auto ret = RUN_ALL_TESTS();
if (!ret && FLAGS_benchmark) {
folly::runBenchmarks();
}
return ret;
}