/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/synchronization/SmallLocks.h>
#include <atomic>
#include <cassert>
#include <condition_variable>
#include <cstdio>
#include <cstdlib>
#include <limits>
#include <mutex>
#include <string>
#include <thread>
#include <vector>
#include <glog/logging.h>
#include <folly/Random.h>
#include <folly/portability/Asm.h>
#include <folly/portability/GFlags.h>
#include <folly/portability/GMock.h>
#include <folly/portability/GTest.h>
#include <folly/portability/PThread.h>
#include <folly/portability/Unistd.h>
#include <folly/test/TestUtils.h>
using folly::MicroLock;
using folly::MicroSpinLock;
using folly::MSLGuard;
using folly::PicoSpinLock;
DEFINE_int64(
stress_test_seconds, 2, "Number of seconds for which to run stress tests");
namespace {
struct LockedVal {
int ar[1024];
MicroSpinLock lock;
LockedVal() {
lock.init();
memset(ar, 0, sizeof ar);
}
};
// Compile time test for packed struct support (requires that both of
// these classes are POD).
FOLLY_PACK_PUSH
struct ignore1 {
MicroSpinLock msl;
int16_t foo;
} FOLLY_PACK_ATTR;
static_assert(sizeof(ignore1) == 3, "Size check failed");
static_assert(sizeof(MicroSpinLock) == 1, "Size check failed");
struct ignore2 {
PicoSpinLock<uint32_t> psl;
int16_t foo;
} FOLLY_PACK_ATTR;
static_assert(folly::kMscVer || sizeof(ignore2) == 6, "Size check failed");
FOLLY_PACK_POP
LockedVal v;
void splock_test(std::atomic<bool>* go) {
CHECK(go);
auto rng = folly::ThreadLocalPRNG();
while (go->load(std::memory_order_relaxed)) {
folly::asm_volatile_pause();
MSLGuard g(v.lock);
EXPECT_THAT(v.ar, testing::Each(testing::Eq(v.ar[0])));
int byte = folly::Random::rand32(rng);
memset(v.ar, char(byte), sizeof v.ar);
}
}
template <class T>
struct PslTest {
PicoSpinLock<T> lock;
PslTest() { lock.init(); }
void doTest() {
using UT = typename std::make_unsigned<T>::type;
T ourVal = rand() % T(UT(1) << (sizeof(UT) * 8 - 1));
for (int i = 0; i < 100; ++i) {
std::lock_guard<PicoSpinLock<T>> guard(lock);
lock.setData(ourVal);
for (int n = 0; n < 10; ++n) {
folly::asm_volatile_pause();
EXPECT_EQ(lock.getData(), ourVal);
}
}
}
};
template <class T>
void doPslTest() {
PslTest<T> testObj;
const int nthrs = 17;
std::vector<std::thread> threads;
for (int i = 0; i < nthrs; ++i) {
threads.push_back(std::thread(&PslTest<T>::doTest, &testObj));
}
for (auto& t : threads) {
t.join();
}
}
struct TestClobber {
TestClobber() { lock_.init(); }
void go() {
std::lock_guard<MicroSpinLock> g(lock_);
// This bug depends on gcc register allocation and is very sensitive. We
// have to use DCHECK instead of EXPECT_*.
DCHECK(!lock_.try_lock());
}
private:
MicroSpinLock lock_;
};
} // namespace
TEST(SmallLocks, SpinLockCorrectness) {
EXPECT_EQ(sizeof(MicroSpinLock), 1);
std::atomic<bool> go = true;
int nthrs = sysconf(_SC_NPROCESSORS_ONLN) * 2;
std::vector<std::thread> threads;
for (int i = 0; i < nthrs; ++i) {
threads.push_back(std::thread(splock_test, &go));
}
/* sleep override */ std::this_thread::sleep_for(std::chrono::seconds(1));
go.store(false, std::memory_order_relaxed);
for (auto& t : threads) {
t.join();
}
}
TEST(SmallLocks, PicoSpinCorrectness) {
doPslTest<int16_t>();
doPslTest<uint16_t>();
doPslTest<int32_t>();
doPslTest<uint32_t>();
doPslTest<int64_t>();
doPslTest<uint64_t>();
}
TEST(SmallLocks, PicoSpinSigned) {
typedef PicoSpinLock<int16_t, 0> Lock;
Lock val;
val.init(-4);
EXPECT_EQ(val.getData(), -4);
{
std::lock_guard<Lock> guard(val);
EXPECT_EQ(val.getData(), -4);
val.setData(-8);
EXPECT_EQ(val.getData(), -8);
}
EXPECT_EQ(val.getData(), -8);
}
TEST(SmallLocks, PicoSpinLockThreadSanitizer) {
SKIP_IF(!folly::kIsSanitizeThread) << "Enabled in TSAN mode only";
typedef PicoSpinLock<int16_t, 0> Lock;
{
Lock a;
Lock b;
a.init(-8);
b.init(-8);
{
std::lock_guard<Lock> ga(a);
std::lock_guard<Lock> gb(b);
}
{
std::lock_guard<Lock> gb(b);
EXPECT_DEATH(
[&]() {
std::lock_guard<Lock> ga(a);
// If halt_on_error is turned off for TSAN, then death would
// happen on exit, so give that a chance as well.
std::_Exit(1);
}(),
"Cycle in lock order graph");
}
}
}
TEST(SmallLocks, RegClobber) {
TestClobber().go();
}
static_assert(sizeof(MicroLock) == 1, "Size check failed");
namespace {
struct SimpleBarrier {
SimpleBarrier() : lock_(), cv_(), ready_(false) {}
void wait() {
std::unique_lock<std::mutex> lockHeld(lock_);
while (!ready_) {
cv_.wait(lockHeld);
}
}
void run() {
{
std::unique_lock<std::mutex> lockHeld(lock_);
ready_ = true;
}
cv_.notify_all();
}
private:
std::mutex lock_;
std::condition_variable cv_;
bool ready_;
};
} // namespace
TEST(SmallLocks, MicroLock) {
volatile uint64_t counter = 0;
std::vector<std::thread> threads;
static const unsigned nrThreads = 20;
static const unsigned iterPerThread = 10000;
SimpleBarrier startBarrier;
// Embed the lock in a larger structure to ensure that we do not
// affect bits outside the ones MicroLock is defined to affect.
struct {
uint8_t a;
std::atomic<uint8_t> b;
MicroLock alock;
std::atomic<uint8_t> d;
} x;
uint8_t origB = 'b';
uint8_t origD = 'd';
x.a = 'a';
x.b = origB;
x.d = origD;
// This thread touches other parts of the host word to show that
// MicroLock does not interfere with memory outside of the byte
// it owns.
std::thread adjacentMemoryToucher = std::thread([&] {
startBarrier.wait();
for (unsigned iter = 0; iter < iterPerThread; ++iter) {
if (iter % 2) {
x.b++;
} else {
x.d++;
}
}
});
for (unsigned i = 0; i < nrThreads; ++i) {
threads.emplace_back([&] {
startBarrier.wait();
for (unsigned iter = 0; iter < iterPerThread; ++iter) {
x.alock.lock();
counter += 1;
// The occasional sleep makes it more likely that we'll
// exercise the futex-wait path inside MicroLock.
if (iter % 1000 == 0) {
struct timespec ts = {0, 10000};
(void)nanosleep(&ts, nullptr);
}
x.alock.unlock();
}
});
}
startBarrier.run();
for (auto it = threads.begin(); it != threads.end(); ++it) {
it->join();
}
adjacentMemoryToucher.join();
EXPECT_EQ(x.a, 'a');
EXPECT_EQ(x.b, (uint8_t)(origB + iterPerThread / 2));
EXPECT_EQ(x.d, (uint8_t)(origD + iterPerThread / 2));
EXPECT_EQ(counter, ((uint64_t)nrThreads * iterPerThread));
}
TEST(SmallLocks, MicroLockTryLock) {
MicroLock lock;
EXPECT_TRUE(lock.try_lock());
EXPECT_FALSE(lock.try_lock());
lock.unlock();
}
TEST(SmallLocks, MicroLockWithData) {
MicroLock lock;
EXPECT_EQ(lock.load(std::memory_order_relaxed), 0);
EXPECT_EQ(lock.lockAndLoad(), 0);
lock.unlockAndStore(42);
EXPECT_EQ(lock.load(std::memory_order_relaxed), 42);
EXPECT_EQ(lock.lockAndLoad(), 42);
lock.unlock();
EXPECT_EQ(lock.load(std::memory_order_relaxed), 42);
lock.store(12, std::memory_order_relaxed);
{
MicroLock::LockGuardWithData guard{lock};
EXPECT_EQ(guard.loadedValue(), 12);
EXPECT_EQ(lock.load(std::memory_order_relaxed), 12);
guard.storeValue(24);
// Should not have immediate effect
EXPECT_EQ(guard.loadedValue(), 12);
EXPECT_EQ(lock.load(std::memory_order_relaxed), 12);
}
EXPECT_EQ(lock.load(std::memory_order_relaxed), 24);
// Drop the two most significant bits
lock.lock();
lock.unlockAndStore(0b10011001);
EXPECT_EQ(lock.load(std::memory_order_relaxed), 0b00011001);
lock.store(0b11101110, std::memory_order_relaxed);
EXPECT_EQ(lock.load(std::memory_order_relaxed), 0b00101110);
}
TEST(SmallLocks, MicroLockDataAlignment) {
struct alignas(uint32_t) Thing {
uint8_t padding1[2];
MicroLock lock;
uint8_t padding2;
} thing;
auto& lock = thing.lock;
EXPECT_EQ(lock.lockAndLoad(), 0);
lock.unlockAndStore(60);
EXPECT_EQ(lock.load(std::memory_order_relaxed), 60);
}
namespace {
template <typename Mutex, typename Duration>
void simpleStressTest(Duration duration, int numThreads) {
auto&& mutex = Mutex{};
auto&& data = std::atomic<std::uint64_t>{0};
auto&& threads = std::vector<std::thread>{};
auto&& stop = std::atomic<bool>{true};
for (auto i = 0; i < numThreads; ++i) {
threads.emplace_back([&mutex, &data, &stop] {
while (!stop.load(std::memory_order_relaxed)) {
auto lck = std::unique_lock<Mutex>{mutex};
EXPECT_EQ(data.fetch_add(1, std::memory_order_relaxed), 0);
EXPECT_EQ(data.fetch_sub(1, std::memory_order_relaxed), 1);
}
});
}
std::this_thread::sleep_for(duration);
stop.store(true);
for (auto& thread : threads) {
thread.join();
}
}
} // namespace
TEST(SmallLocks, MicroSpinLockStressTestLockTwoThreads) {
auto duration = std::chrono::seconds{FLAGS_stress_test_seconds};
simpleStressTest<MicroSpinLock>(duration, 2);
}
TEST(SmallLocks, MicroSpinLockStressTestLockHardwareConcurrency) {
auto duration = std::chrono::seconds{FLAGS_stress_test_seconds};
auto threads = std::thread::hardware_concurrency();
simpleStressTest<MicroSpinLock>(duration, threads);
}
TEST(SmallLocks, PicoSpinLockStressTestLockTwoThreads) {
auto duration = std::chrono::seconds{FLAGS_stress_test_seconds};
simpleStressTest<PicoSpinLock<std::uint16_t>>(duration, 2);
}
TEST(SmallLocks, PicoSpinLockStressTestLockHardwareConcurrency) {
auto duration = std::chrono::seconds{FLAGS_stress_test_seconds};
auto threads = std::thread::hardware_concurrency();
simpleStressTest<PicoSpinLock<std::uint16_t>>(duration, threads);
}
namespace {
template <typename Mutex>
class MutexWrapper {
public:
void lock() {
while (!mutex_.try_lock()) {
}
}
void unlock() { mutex_.unlock(); }
Mutex mutex_;
};
template <typename Mutex, typename Duration>
void simpleStressTestTryLock(Duration duration, int numThreads) {
simpleStressTest<MutexWrapper<Mutex>>(duration, numThreads);
}
} // namespace
TEST(SmallLocks, MicroSpinLockStressTestTryLockTwoThreads) {
auto duration = std::chrono::seconds{FLAGS_stress_test_seconds};
simpleStressTestTryLock<MicroSpinLock>(duration, 2);
}
TEST(SmallLocks, MicroSpinLockStressTestTryLockHardwareConcurrency) {
auto duration = std::chrono::seconds{FLAGS_stress_test_seconds};
auto threads = std::thread::hardware_concurrency();
simpleStressTestTryLock<MicroSpinLock>(duration, threads);
}
TEST(SmallLocksk, MicroSpinLockThreadSanitizer) {
SKIP_IF(!folly::kIsSanitizeThread) << "Enabled in TSAN mode only";
uint8_t val = 0;
static_assert(sizeof(uint8_t) == sizeof(MicroSpinLock), "sanity check");
// make sure TSAN handles this case too:
// same lock but initialized via setting a value
for (int i = 0; i < 10; i++) {
val = 0;
std::lock_guard<MicroSpinLock> g(*reinterpret_cast<MicroSpinLock*>(&val));
}
{
MicroSpinLock a;
MicroSpinLock b;
a.init();
b.init();
{
std::lock_guard<MicroSpinLock> ga(a);
std::lock_guard<MicroSpinLock> gb(b);
}
{
std::lock_guard<MicroSpinLock> gb(b);
EXPECT_DEATH(
[&]() {
std::lock_guard<MicroSpinLock> ga(a);
// If halt_on_error is turned off for TSAN, then death would
// happen on exit, so give that a chance as well.
std::_Exit(1);
}(),
"Cycle in lock order graph");
}
}
{
uint8_t a = 0;
uint8_t b = 0;
{
std::lock_guard<MicroSpinLock> ga(*reinterpret_cast<MicroSpinLock*>(&a));
std::lock_guard<MicroSpinLock> gb(*reinterpret_cast<MicroSpinLock*>(&b));
}
a = 0;
b = 0;
{
std::lock_guard<MicroSpinLock> gb(*reinterpret_cast<MicroSpinLock*>(&b));
EXPECT_DEATH(
[&]() {
std::lock_guard<MicroSpinLock> ga(
*reinterpret_cast<MicroSpinLock*>(&a));
// If halt_on_error is turned off for TSAN, then death would
// happen on exit, so give that a chance as well.
std::_Exit(1);
}(),
"Cycle in lock order graph");
}
}
}
TEST(SmallLocks, PicoSpinLockStressTestTryLockTwoThreads) {
auto duration = std::chrono::seconds{FLAGS_stress_test_seconds};
simpleStressTestTryLock<PicoSpinLock<std::uint16_t>>(duration, 2);
}
TEST(SmallLocks, PicoSpinLockStressTestTryLockHardwareConcurrency) {
auto duration = std::chrono::seconds{FLAGS_stress_test_seconds};
auto threads = std::thread::hardware_concurrency();
simpleStressTestTryLock<PicoSpinLock<std::uint16_t>>(duration, threads);
}