* Copyright (c) Meta Platforms, Inc. and affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
#include <chrono>
#include <cmath>
#include <iomanip>
#include <iostream>
#include <thread>
#include <vector>
#include <folly/Conv.h>
#include <folly/container/Array.h>
#include <folly/io/async/test/RequestContextHelper.h>
#include <folly/portability/GFlags.h>
#include <folly/synchronization/test/Barrier.h>
DEFINE_int32(reps, 10, "number of reps");
DEFINE_int32(ops, 1000000, "number of operations per rep");
using namespace folly;
RequestToken token("test");
template <typename Func>
inline uint64_t run_once(int nthr, const Func& fn) {
folly::test::Barrier b1(nthr + 1);
std::vector<std::thread> thr(nthr);
for (int tid = 0; tid < nthr; ++tid) {
thr[tid] = std::thread([&, tid] {
/* begin time measurement */
auto const tbegin = std::chrono::steady_clock::now();
/* wait for completion */
for (int i = 0; i < nthr; ++i) {
/* end time measurement */
auto const tend = std::chrono::steady_clock::now();
auto const dur = tend - tbegin;
return std::chrono::duration_cast<std::chrono::nanoseconds>(dur).count();
template <typename Func>
uint64_t runBench(int ops, int nthr, const Func& fn) {
uint64_t reps = FLAGS_reps;
uint64_t min = UINTMAX_MAX;
uint64_t max = 0;
uint64_t sum = 0;
std::vector<uint64_t> durs(reps);
for (uint64_t r = 0; r < reps; ++r) {
uint64_t dur = run_once(nthr, fn);
durs[r] = dur;
sum += dur;
min = std::min(min, dur);
max = std::max(max, dur);
// if each rep takes too long run at least 3 reps
const uint64_t minute = 60000000000ULL;
if (sum > minute && r >= 2) {
reps = r + 1;
const std::string ns_unit = " ns";
uint64_t avg = sum / reps;
uint64_t res = min;
uint64_t varsum = 0;
for (uint64_t r = 0; r < reps; ++r) {
auto term = int64_t(reps * durs[r]) - int64_t(sum);
varsum += term * term;
uint64_t dev = uint64_t(std::sqrt(varsum) * std::pow(reps, -1.5));
std::cout << " " << std::setw(4) << max / ops << ns_unit;
std::cout << " " << std::setw(4) << avg / ops << ns_unit;
std::cout << " " << std::setw(4) << dev / ops << ns_unit;
std::cout << " " << std::setw(4) << res / ops << ns_unit;
std::cout << std::endl;
return res;
uint64_t bench_set_clearContextData(int nthr, uint64_t ops) {
auto fn = [&](int tid) {
RequestContextScopeGuard g;
for (uint64_t i = tid; i < ops; i += nthr) {
token, std::make_unique<TestData>(tid));
return runBench(ops, nthr, fn);
uint64_t bench_hasContextData(int nthr, uint64_t ops, bool hit) {
auto fn = [&](int tid) {
RequestContextScopeGuard g;
if (hit) {
token, std::make_unique<TestData>(tid));
for (uint64_t i = tid; i < ops; i += nthr) {
return runBench(ops, nthr, fn);
uint64_t bench_getContextData(int nthr, uint64_t ops, bool hit) {
auto fn = [&](int tid) {
RequestContextScopeGuard g;
if (hit) {
token, std::make_unique<TestData>(tid));
for (uint64_t i = tid; i < ops; i += nthr) {
return runBench(ops, nthr, fn);
uint64_t bench_onSet(int nthr, uint64_t ops, bool nonempty) {
auto fn = [&](int tid) {
RequestContextScopeGuard g;
if (nonempty) {
token, std::make_unique<TestData>(tid));
for (uint64_t i = tid; i < ops; i += nthr) {
return runBench(ops, nthr, fn);
uint64_t bench_onUnset(int nthr, uint64_t ops, bool nonempty) {
auto fn = [&](int tid) {
RequestContextScopeGuard g;
if (nonempty) {
token, std::make_unique<TestData>(tid));
for (uint64_t i = tid; i < ops; i += nthr) {
return runBench(ops, nthr, fn);
uint64_t bench_setContext(int nthr, uint64_t ops, bool nonempty) {
auto fn = [&](int tid) {
auto ctx = std::make_shared<RequestContext>();
if (nonempty) {
ctx->setContextData(token, std::make_unique<TestData>(1));
ctx = std::make_shared<RequestContext>();
if (nonempty) {
ctx->setContextData(token, std::make_unique<TestData>(2));
for (uint64_t i = tid; i < ops; i += nthr) {
ctx = RequestContext::setContext(std::move(ctx));
return runBench(ops, nthr, fn);
uint64_t bench_RequestContextScopeGuard(int nthr, uint64_t ops, bool nonempty) {
auto fn = [&](int tid) {
RequestContextScopeGuard g1;
if (nonempty) {
token, std::make_unique<TestData>(1));
auto ctx = std::make_shared<RequestContext>();
if (nonempty) {
ctx->setContextData(token, std::make_unique<TestData>(2));
for (uint64_t i = tid; i < ops; i += nthr) {
RequestContextScopeGuard g2(ctx);
return runBench(ops, nthr, fn);
uint64_t bench_ShallowCopyRequestContextScopeGuard(
int nthr, uint64_t ops, int keep, bool replace) {
auto fn = [&](int tid) {
RequestContextScopeGuard g1;
auto ctx = RequestContext::get();
for (int i = 0; i < keep; ++i) {
folly::to<std::string>(1000 + i), std::make_unique<TestData>(i));
if (replace) {
ctx->setContextData(token, std::make_unique<TestData>(1));
for (uint64_t i = tid; i < ops; i += nthr) {
ShallowCopyRequestContextScopeGuard g2(
token, std::make_unique<TestData>(2));
} else {
for (uint64_t i = tid; i < ops; i += nthr) {
ShallowCopyRequestContextScopeGuard g2;
return runBench(ops, nthr, fn);
void dottedLine() {
<< "........................................................................"
<< std::endl;
void doubleLine() {
<< "========================================================================"
<< std::endl;
constexpr auto nthr = folly::make_array<int>(1, 10);
void benches() {
std::cout << std::setw(2) << FLAGS_reps << " reps of " << std::setw(8)
<< FLAGS_ops << " operations\n";
std::cout << "$ numactl -N 1 $dir/request_context_benchmark\n";
<< "Test name Max time Avg time Dev time Min time"
<< std::endl;
for (int i : nthr) {
std::cout << "============================== " << std::setw(2) << i
<< " threads " << "==============================" << std::endl;
const uint64_t ops = FLAGS_ops;
std::cout << "hasContextData ";
bench_hasContextData(i, ops, true);
std::cout << "getContextData ";
bench_getContextData(i, ops, true);
std::cout << "onSet ";
bench_onSet(i, ops, true);
std::cout << "onUnset ";
bench_onUnset(i, ops, true);
std::cout << "setContext ";
bench_setContext(i, ops, true);
std::cout << "RequestContextScopeGuard ";
bench_RequestContextScopeGuard(i, ops, true);
std::cout << "ShallowCopyRequestC...-replace ";
bench_ShallowCopyRequestContextScopeGuard(i, ops, 0, true);
std::cout << "ShallowCopyReq...-keep&replace ";
bench_ShallowCopyRequestContextScopeGuard(i, ops, 12, true);
int main(int argc, char** argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
10 reps of 1000000 operations
$ numactl -N 1 $dir/request_context_benchmark
Test name Max time Avg time Dev time Min time
============================== 1 threads ==============================
hasContextData 7 ns 7 ns 0 ns 7 ns
getContextData 7 ns 7 ns 0 ns 7 ns
onSet 12 ns 12 ns 0 ns 12 ns
onUnset 12 ns 12 ns 0 ns 12 ns
setContext 46 ns 44 ns 1 ns 42 ns
RequestContextScopeGuard 113 ns 103 ns 3 ns 101 ns
ShallowCopyRequestC...-replace 213 ns 201 ns 5 ns 196 ns
ShallowCopyReq...-keep&replace 883 ns 835 ns 20 ns 814 ns
============================== 10 threads ==============================
hasContextData 1 ns 1 ns 0 ns 1 ns
getContextData 2 ns 1 ns 0 ns 1 ns
onSet 2 ns 2 ns 0 ns 1 ns
onUnset 2 ns 2 ns 0 ns 1 ns
setContext 11 ns 7 ns 2 ns 5 ns
RequestContextScopeGuard 22 ns 15 ns 5 ns 11 ns
ShallowCopyRequestC...-replace 48 ns 30 ns 11 ns 21 ns
ShallowCopyReq...-keep&replace 98 ns 93 ns 2 ns 91 ns