folly/folly/test/SharedMutexTest.cpp

/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <folly/SharedMutex.h>

#include <stdlib.h>

#include <chrono>
#include <mutex>
#include <optional>
#include <random>
#include <shared_mutex> // std::shared_lock
#include <system_error>
#include <thread>
#include <vector>

#include <boost/thread/shared_mutex.hpp>

#include <folly/Benchmark.h>
#include <folly/MPMCQueue.h>
#include <folly/fibers/Baton.h>
#include <folly/fibers/TimedMutex.h>
#include <folly/portability/GFlags.h>
#include <folly/portability/GTest.h>
#include <folly/synchronization/RWSpinLock.h>
#include <folly/test/DeterministicSchedule.h>
#include <folly/test/TestUtils.h>

using namespace folly;
using namespace folly::test;
using namespace std;
using namespace std::chrono;

struct DSharedMutexPolicy : SharedMutexPolicyDefault {
  static constexpr uint64_t max_spin_cycles = 0;
  static constexpr uint32_t max_soft_yield_count = 0;
};
using DSched = DeterministicSchedule;
using DSharedMutexReadPriority =
    SharedMutexImpl<true, void, DeterministicAtomic, DSharedMutexPolicy>;
using DSharedMutexWritePriority =
    SharedMutexImpl<false, void, DeterministicAtomic, DSharedMutexPolicy>;

template <typename Lock>
void runBasicTest() {
  Lock lock;
  SharedMutexToken token1;
  SharedMutexToken token2;
  SharedMutexToken token3;

  EXPECT_TRUE(lock.eligible_for_lock_elision());
  EXPECT_TRUE(lock.try_lock());
  EXPECT_FALSE(lock.eligible_for_lock_elision());
  EXPECT_FALSE(lock.try_lock());
  EXPECT_FALSE(lock.eligible_for_lock_shared_elision());
  EXPECT_FALSE(lock.try_lock_shared(token1));
  lock.unlock();

  EXPECT_TRUE(lock.eligible_for_lock_shared_elision());
  EXPECT_TRUE(lock.try_lock_shared(token1));
  EXPECT_FALSE(lock.eligible_for_lock_elision());
  EXPECT_FALSE(lock.try_lock());
  EXPECT_TRUE(lock.eligible_for_lock_shared_elision());
  EXPECT_TRUE(lock.try_lock_shared(token2));
  lock.lock_shared(token3);
  lock.unlock_shared(token3);
  lock.unlock_shared(token2);
  lock.unlock_shared(token1);

  lock.lock();
  lock.unlock();

  lock.lock_shared(token1);
  lock.lock_shared(token2);
  lock.unlock_shared(token1);
  lock.unlock_shared(token2);

  lock.lock();
  lock.unlock_and_lock_shared(token1);
  lock.lock_shared(token2);
  lock.unlock_shared(token2);
  lock.unlock_shared(token1);
}

TEST(SharedMutex, basic) {
  runBasicTest<SharedMutexReadPriority>();
  runBasicTest<SharedMutexWritePriority>();
  runBasicTest<SharedMutexSuppressTSAN>();
  runBasicTest<SharedMutexTracked>();
}

template <typename Lock>
void runBasicHoldersTest() {
  Lock lock;
  SharedMutexToken token;

  {
    // create an exclusive write lock via holder
    std::unique_lock holder(lock);
    EXPECT_FALSE(lock.eligible_for_lock_elision());
    EXPECT_FALSE(lock.try_lock());
    EXPECT_FALSE(lock.eligible_for_lock_shared_elision());
    EXPECT_FALSE(lock.try_lock_shared(token));

    // move ownership to another write holder via move constructor
    std::unique_lock holder2(std::move(holder));
    EXPECT_FALSE(lock.eligible_for_lock_elision());
    EXPECT_FALSE(lock.try_lock());
    EXPECT_FALSE(lock.eligible_for_lock_shared_elision());
    EXPECT_FALSE(lock.try_lock_shared(token));

    // move ownership to another write holder via assign operator
    std::unique_lock holder3(lock, std::defer_lock);
    holder3 = std::move(holder2);
    EXPECT_FALSE(lock.eligible_for_lock_elision());
    EXPECT_FALSE(lock.try_lock());
    EXPECT_FALSE(lock.eligible_for_lock_shared_elision());
    EXPECT_FALSE(lock.try_lock_shared(token));

    // downgrade from exclusive to upgrade lock via move constructor
    folly::upgrade_lock holder4(
        folly::transition_lock<folly::upgrade_lock>(holder3));

    // ensure we can lock from a shared source
    EXPECT_FALSE(lock.eligible_for_lock_elision());
    EXPECT_FALSE(lock.try_lock());
    EXPECT_TRUE(lock.eligible_for_lock_shared_elision());
    EXPECT_TRUE(lock.try_lock_shared(token));
    lock.unlock_shared(token);

    // promote from upgrade to exclusive lock via move constructor
    std::unique_lock holder5(folly::transition_lock<std::unique_lock>(holder4));
    EXPECT_FALSE(lock.eligible_for_lock_elision());
    EXPECT_FALSE(lock.try_lock());
    EXPECT_FALSE(lock.eligible_for_lock_shared_elision());
    EXPECT_FALSE(lock.try_lock_shared(token));

    // downgrade exclusive to shared lock via move constructor
    std::shared_lock holder6(folly::transition_lock<std::shared_lock>(holder5));

    // ensure we can lock from another shared source
    EXPECT_FALSE(lock.eligible_for_lock_elision());
    EXPECT_FALSE(lock.try_lock());
    EXPECT_TRUE(lock.eligible_for_lock_shared_elision());
    EXPECT_TRUE(lock.try_lock_shared(token));
    lock.unlock_shared(token);
  }

  {
    std::unique_lock holder(lock);
    EXPECT_FALSE(lock.eligible_for_lock_elision());
    EXPECT_FALSE(lock.try_lock());
  }

  {
    std::shared_lock holder(lock);
    std::shared_lock holder2(lock);
    folly::upgrade_lock holder3(lock);
  }

  {
    folly::upgrade_lock holder(lock);
    std::shared_lock holder2(lock);
    std::shared_lock holder3(folly::transition_lock<std::shared_lock>(holder));
  }
}

TEST(SharedMutex, basicHolders) {
  runBasicHoldersTest<SharedMutexReadPriority>();
  runBasicHoldersTest<SharedMutexWritePriority>();
  runBasicHoldersTest<SharedMutexSuppressTSAN>();
  runBasicHoldersTest<SharedMutexTracked>();
}

template <typename Lock>
void runManyReadLocksTestWithTokens() {
  Lock lock;

  vector<SharedMutexToken> tokens;
  for (int i = 0; i < 1000; ++i) {
    tokens.emplace_back();
    EXPECT_TRUE(lock.eligible_for_lock_shared_elision());
    EXPECT_TRUE(lock.try_lock_shared(tokens.back()));
  }
  for (auto& token : tokens) {
    lock.unlock_shared(token);
  }

  EXPECT_TRUE(lock.try_lock());
  lock.unlock();
}

TEST(SharedMutex, manyReadLocksWithTokens) {
  // This test fails in an assertion in the TSAN library because there are too
  // many mutexes
  SKIP_IF(folly::kIsSanitizeThread);
  runManyReadLocksTestWithTokens<SharedMutexReadPriority>();
  runManyReadLocksTestWithTokens<SharedMutexWritePriority>();
  runManyReadLocksTestWithTokens<SharedMutexSuppressTSAN>();
  runManyReadLocksTestWithTokens<SharedMutexTracked>();
}

template <typename Lock>
void runManyReadLocksTestWithoutTokens() {
  Lock lock;

  for (int i = 0; i < 1000; ++i) {
    EXPECT_TRUE(lock.eligible_for_lock_shared_elision());
    EXPECT_TRUE(lock.try_lock_shared());
  }
  for (int i = 0; i < 1000; ++i) {
    lock.unlock_shared();
  }

  EXPECT_TRUE(lock.try_lock());
  lock.unlock();
}

TEST(SharedMutex, manyReadLocksWithoutTokens) {
  // This test fails in an assertion in the TSAN library because there are too
  // many mutexes
  SKIP_IF(folly::kIsSanitizeThread);
  runManyReadLocksTestWithoutTokens<SharedMutexReadPriority>();
  runManyReadLocksTestWithoutTokens<SharedMutexWritePriority>();
  runManyReadLocksTestWithoutTokens<SharedMutexSuppressTSAN>();
  runManyReadLocksTestWithoutTokens<SharedMutexTracked>();
}

template <typename Lock>
void runTimeoutInPastTest() {
  Lock lock;

  EXPECT_TRUE(lock.try_lock_for(milliseconds(0)));
  lock.unlock();
  EXPECT_TRUE(lock.try_lock_for(milliseconds(-1)));
  lock.unlock();
  EXPECT_TRUE(lock.try_lock_shared_for(milliseconds(0)));
  lock.unlock_shared();
  EXPECT_TRUE(lock.try_lock_shared_for(milliseconds(-1)));
  lock.unlock_shared();
  EXPECT_TRUE(lock.try_lock_until(system_clock::now() - milliseconds(1)));
  lock.unlock();
  EXPECT_TRUE(
      lock.try_lock_shared_until(system_clock::now() - milliseconds(1)));
  lock.unlock_shared();
  EXPECT_TRUE(lock.try_lock_until(steady_clock::now() - milliseconds(1)));
  lock.unlock();
  EXPECT_TRUE(
      lock.try_lock_shared_until(steady_clock::now() - milliseconds(1)));
  lock.unlock_shared();
}

TEST(SharedMutex, timeoutInPast) {
  runTimeoutInPastTest<SharedMutexReadPriority>();
  runTimeoutInPastTest<SharedMutexWritePriority>();
  runTimeoutInPastTest<SharedMutexSuppressTSAN>();
  runTimeoutInPastTest<SharedMutexTracked>();
}

template <class Func>
bool funcHasDuration(milliseconds expectedDuration, Func func) {
  // elapsed time should eventually fall within expectedDuration +- 25%
  for (int tries = 0; tries < 100; ++tries) {
    auto start = steady_clock::now();
    func();
    auto elapsed = steady_clock::now() - start;
    if (elapsed > expectedDuration - expectedDuration / 4 &&
        elapsed < expectedDuration + expectedDuration / 4) {
      return true;
    }
  }
  return false;
}

template <typename Lock>
void runFailingTryTimeoutTest() {
  Lock lock;
  lock.lock();
  EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
    EXPECT_FALSE(lock.try_lock_for(milliseconds(10)));
  }));
  EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
    typename Lock::Token token;
    EXPECT_FALSE(lock.try_lock_shared_for(milliseconds(10), token));
  }));
  EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
    EXPECT_FALSE(lock.try_lock_upgrade_for(milliseconds(10)));
  }));
  EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
    EXPECT_FALSE(lock.try_lock_until(steady_clock::now() + milliseconds(10)));
  }));
  EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
    typename Lock::Token token;
    EXPECT_FALSE(lock.try_lock_shared_until(
        steady_clock::now() + milliseconds(10), token));
  }));
  EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
    EXPECT_FALSE(
        lock.try_lock_upgrade_until(steady_clock::now() + milliseconds(10)));
  }));
  EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
    EXPECT_FALSE(lock.try_lock_until(system_clock::now() + milliseconds(10)));
  }));
  EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
    typename Lock::Token token;
    EXPECT_FALSE(lock.try_lock_shared_until(
        system_clock::now() + milliseconds(10), token));
  }));
  EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
    EXPECT_FALSE(
        lock.try_lock_upgrade_until(system_clock::now() + milliseconds(10)));
  }));
  lock.unlock();

  lock.lock_shared();
  EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
    EXPECT_FALSE(lock.try_lock_for(milliseconds(10)));
  }));
  EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
    EXPECT_FALSE(lock.try_lock_until(steady_clock::now() + milliseconds(10)));
  }));
  EXPECT_TRUE(funcHasDuration(milliseconds(10), [&] {
    EXPECT_FALSE(lock.try_lock_until(system_clock::now() + milliseconds(10)));
  }));
  lock.unlock_shared();

  lock.lock();
  for (int p = 0; p < 8; ++p) {
    EXPECT_FALSE(lock.try_lock_for(nanoseconds(1 << p)));
  }
  lock.unlock();

  for (int p = 0; p < 8; ++p) {
    typename std::shared_lock holder1(lock);
    typename std::shared_lock holder2(lock);
    typename std::shared_lock holder3(lock);
    EXPECT_FALSE(lock.try_lock_for(nanoseconds(1 << p)));
  }
}

TEST(SharedMutex, failingTryTimeout) {
  runFailingTryTimeoutTest<SharedMutexReadPriority>();
  runFailingTryTimeoutTest<SharedMutexWritePriority>();
  runFailingTryTimeoutTest<SharedMutexSuppressTSAN>();
  runFailingTryTimeoutTest<SharedMutexTracked>();
}

template <typename Lock>
void runBasicUpgradeTest() {
  Lock lock;
  typename Lock::Token token1;
  typename Lock::Token token2;

  EXPECT_TRUE(lock.eligible_for_lock_upgrade_elision());
  lock.lock_upgrade();
  EXPECT_FALSE(lock.eligible_for_lock_upgrade_elision());
  EXPECT_FALSE(lock.eligible_for_lock_elision());
  EXPECT_FALSE(lock.try_lock());
  EXPECT_TRUE(lock.eligible_for_lock_shared_elision());
  EXPECT_TRUE(lock.try_lock_shared(token1));
  lock.unlock_shared(token1);
  lock.unlock_upgrade();

  EXPECT_TRUE(lock.eligible_for_lock_upgrade_elision());
  lock.lock_upgrade();
  lock.unlock_upgrade_and_lock();
  EXPECT_FALSE(lock.eligible_for_lock_shared_elision());
  EXPECT_FALSE(lock.try_lock_shared(token1));
  lock.unlock();

  lock.lock_upgrade();
  lock.unlock_upgrade_and_lock_shared(token1);
  lock.lock_upgrade();
  lock.unlock_upgrade_and_lock_shared(token2);
  lock.unlock_shared(token1);
  lock.unlock_shared(token2);

  lock.lock();
  lock.unlock_and_lock_upgrade();
  EXPECT_TRUE(lock.eligible_for_lock_shared_elision());
  EXPECT_TRUE(lock.try_lock_shared(token1));
  lock.unlock_upgrade();
  lock.unlock_shared(token1);
}

TEST(SharedMutex, basicUpgradeTests) {
  runBasicUpgradeTest<SharedMutexReadPriority>();
  runBasicUpgradeTest<SharedMutexWritePriority>();
  runBasicUpgradeTest<SharedMutexSuppressTSAN>();
  runBasicUpgradeTest<SharedMutexTracked>();
}

TEST(SharedMutex, readHasPrio) {
  SharedMutexReadPriority lock;
  SharedMutexToken token1;
  SharedMutexToken token2;
  lock.lock_shared(token1);
  bool exclusiveAcquired = false;
  auto writer = thread([&] {
    lock.lock();
    exclusiveAcquired = true;
    lock.unlock();
  });

  // lock() can't complete until we unlock token1, but it should stake
  // its claim with regards to other exclusive or upgrade locks.  We can
  // use try_lock_upgrade to poll for that eventuality.
  while (lock.try_lock_upgrade()) {
    lock.unlock_upgrade();
    this_thread::yield();
  }
  EXPECT_FALSE(exclusiveAcquired);

  // Even though lock() is stuck we should be able to get token2
  EXPECT_TRUE(lock.try_lock_shared(token2));
  lock.unlock_shared(token1);
  lock.unlock_shared(token2);
  writer.join();
  EXPECT_TRUE(exclusiveAcquired);
}

TEST(SharedMutex, writeHasPrio) {
  SharedMutexWritePriority lock;
  SharedMutexToken token1;
  SharedMutexToken token2;
  lock.lock_shared(token1);
  auto writer = thread([&] {
    lock.lock();
    lock.unlock();
  });

  // eventually lock() should block readers
  while (lock.try_lock_shared(token2)) {
    lock.unlock_shared(token2);
    this_thread::yield();
  }

  lock.unlock_shared(token1);
  writer.join();
}

struct TokenLocker {
  SharedMutexToken token;

  template <typename T>
  void lock(T* lockable) {
    lockable->lock();
  }

  template <typename T>
  void unlock(T* lockable) {
    lockable->unlock();
  }

  template <typename T>
  void lock_shared(T* lockable) {
    lockable->lock_shared(token);
  }

  template <typename T>
  void unlock_shared(T* lockable) {
    lockable->unlock_shared(token);
  }
};

struct Locker {
  template <typename T>
  void lock(T* lockable) {
    lockable->lock();
  }

  template <typename T>
  void unlock(T* lockable) {
    lockable->unlock();
  }

  template <typename T>
  void lock_shared(T* lockable) {
    lockable->lock_shared();
  }

  template <typename T>
  void unlock_shared(T* lockable) {
    lockable->unlock_shared();
  }
};

struct EnterLocker {
  template <typename T>
  void lock(T* lockable) {
    lockable->lock(0);
  }

  template <typename T>
  void unlock(T* lockable) {
    lockable->unlock();
  }

  template <typename T>
  void lock_shared(T* lockable) {
    lockable->enter(0);
  }

  template <typename T>
  void unlock_shared(T* lockable) {
    lockable->leave();
  }
};

struct PosixRWLock {
  pthread_rwlock_t lock_;

  PosixRWLock() { pthread_rwlock_init(&lock_, nullptr); }

  ~PosixRWLock() { pthread_rwlock_destroy(&lock_); }

  void lock() { pthread_rwlock_wrlock(&lock_); }

  void unlock() { pthread_rwlock_unlock(&lock_); }

  void lock_shared() { pthread_rwlock_rdlock(&lock_); }

  void unlock_shared() { pthread_rwlock_unlock(&lock_); }
};

struct PosixMutex {
  pthread_mutex_t lock_;

  PosixMutex() { pthread_mutex_init(&lock_, nullptr); }

  ~PosixMutex() { pthread_mutex_destroy(&lock_); }

  void lock() { pthread_mutex_lock(&lock_); }

  void unlock() { pthread_mutex_unlock(&lock_); }

  void lock_shared() { pthread_mutex_lock(&lock_); }

  void unlock_shared() { pthread_mutex_unlock(&lock_); }
};

template <template <typename> class Atom, typename Lock, typename Locker>
static void runContendedReaders(
    size_t numOps, size_t numThreads, bool useSeparateLocks) {
  struct alignas(hardware_destructive_interference_size)
      GlobalLockAndProtectedValue {
    Lock globalLock;
    int valueProtectedByLock = 10;
  };
  GlobalLockAndProtectedValue padded;
  Atom<bool> go(false);
  Atom<bool>* goPtr = &go; // workaround for clang bug
  vector<thread> threads(numThreads);

  BENCHMARK_SUSPEND {
    for (size_t t = 0; t < numThreads; ++t) {
      threads[t] = DSched::thread([&, t, numThreads] {
        Lock privateLock;
        Lock* lock = useSeparateLocks ? &privateLock : &(padded.globalLock);
        Locker locker;
        while (!goPtr->load()) {
          this_thread::yield();
        }
        for (size_t op = t; op < numOps; op += numThreads) {
          locker.lock_shared(lock);
          // note: folly::doNotOptimizeAway reads and writes to its arg,
          // so the following two lines are very different than a call
          // to folly::doNotOptimizeAway(valueProtectedByLock);
          auto copy = padded.valueProtectedByLock;
          folly::doNotOptimizeAway(copy);
          locker.unlock_shared(lock);
        }
      });
    }
  }

  go.store(true);
  for (auto& thr : threads) {
    DSched::join(thr);
  }
}

static void folly_rwspin_reads(
    uint32_t numOps, size_t numThreads, bool useSeparateLocks) {
  runContendedReaders<atomic, RWSpinLock, Locker>(
      numOps, numThreads, useSeparateLocks);
}

static void shmtx_wr_pri_reads(
    uint32_t numOps, size_t numThreads, bool useSeparateLocks) {
  runContendedReaders<atomic, SharedMutexWritePriority, TokenLocker>(
      numOps, numThreads, useSeparateLocks);
}

static void shmtx_w_bare_reads(
    uint32_t numOps, size_t numThreads, bool useSeparateLocks) {
  runContendedReaders<atomic, SharedMutexWritePriority, Locker>(
      numOps, numThreads, useSeparateLocks);
}

static void shmtx_rd_pri_reads(
    uint32_t numOps, size_t numThreads, bool useSeparateLocks) {
  runContendedReaders<atomic, SharedMutexReadPriority, TokenLocker>(
      numOps, numThreads, useSeparateLocks);
}

static void shmtx_r_bare_reads(
    uint32_t numOps, size_t numThreads, bool useSeparateLocks) {
  runContendedReaders<atomic, SharedMutexReadPriority, Locker>(
      numOps, numThreads, useSeparateLocks);
}

static void folly_ticket_reads(
    uint32_t numOps, size_t numThreads, bool useSeparateLocks) {
  runContendedReaders<atomic, RWTicketSpinLock64, Locker>(
      numOps, numThreads, useSeparateLocks);
}

static void boost_shared_reads(
    uint32_t numOps, size_t numThreads, bool useSeparateLocks) {
  runContendedReaders<atomic, boost::shared_mutex, Locker>(
      numOps, numThreads, useSeparateLocks);
}

static void pthrd_rwlock_reads(
    uint32_t numOps, size_t numThreads, bool useSeparateLocks) {
  runContendedReaders<atomic, PosixRWLock, Locker>(
      numOps, numThreads, useSeparateLocks);
}

static void timed_wr_pri_reads(
    uint32_t numOps, size_t numThreads, bool useSeparateLocks) {
  runContendedReaders<
      atomic,
      fibers::TimedRWMutexWritePriority<fibers::Baton>,
      Locker>(numOps, numThreads, useSeparateLocks);
}

static void timed_rd_pri_reads(
    uint32_t numOps, size_t numThreads, bool useSeparateLocks) {
  runContendedReaders<
      atomic,
      fibers::TimedRWMutexReadPriority<fibers::Baton>,
      Locker>(numOps, numThreads, useSeparateLocks);
}

template <template <typename> class Atom, typename Lock, typename Locker>
static void runMixed(
    size_t numOps,
    size_t numThreads,
    double writeFraction,
    bool useSeparateLocks) {
  struct alignas(hardware_destructive_interference_size)
      GlobalLockAndProtectedValue {
    Lock globalLock;
    int valueProtectedByLock = 0;
  };
  GlobalLockAndProtectedValue padded;
  Atom<bool> go(false);
  Atom<bool>* goPtr = &go; // workaround for clang bug
  vector<thread> threads(numThreads);

  BENCHMARK_SUSPEND {
    for (size_t t = 0; t < numThreads; ++t) {
      threads[t] = DSched::thread([&, t, numThreads] {
        std::minstd_rand engine;
        engine.seed(t);

        long writeThreshold = to_integral(writeFraction * 0x7fffffff);
        Lock privateLock;
        Lock* lock = useSeparateLocks ? &privateLock : &(padded.globalLock);
        Locker locker;
        while (!goPtr->load()) {
          this_thread::yield();
        }
        for (size_t op = t; op < numOps; op += numThreads) {
          long randVal = engine();
          bool writeOp = randVal < writeThreshold;
          if (writeOp) {
            locker.lock(lock);
            if (!useSeparateLocks) {
              ++(padded.valueProtectedByLock);
            }
            locker.unlock(lock);
          } else {
            locker.lock_shared(lock);
            auto v = padded.valueProtectedByLock;
            folly::doNotOptimizeAway(v);
            locker.unlock_shared(lock);
          }
        }
      });
    }
  }

  go.store(true);
  for (auto& thr : threads) {
    DSched::join(thr);
  }
}

static void folly_rwspin(
    size_t numOps,
    size_t numThreads,
    double writeFraction,
    bool useSeparateLocks) {
  runMixed<atomic, RWSpinLock, Locker>(
      numOps, numThreads, writeFraction, useSeparateLocks);
}

static void shmtx_wr_pri(
    uint32_t numOps,
    size_t numThreads,
    double writeFraction,
    bool useSeparateLocks) {
  runMixed<atomic, SharedMutexWritePriority, TokenLocker>(
      numOps, numThreads, writeFraction, useSeparateLocks);
}

static void shmtx_w_bare(
    uint32_t numOps,
    size_t numThreads,
    double writeFraction,
    bool useSeparateLocks) {
  runMixed<atomic, SharedMutexWritePriority, Locker>(
      numOps, numThreads, writeFraction, useSeparateLocks);
}

static void shmtx_rd_pri(
    uint32_t numOps,
    size_t numThreads,
    double writeFraction,
    bool useSeparateLocks) {
  runMixed<atomic, SharedMutexReadPriority, TokenLocker>(
      numOps, numThreads, writeFraction, useSeparateLocks);
}

static void shmtx_r_bare(
    uint32_t numOps,
    size_t numThreads,
    double writeFraction,
    bool useSeparateLocks) {
  runMixed<atomic, SharedMutexReadPriority, Locker>(
      numOps, numThreads, writeFraction, useSeparateLocks);
}

static void folly_ticket(
    size_t numOps,
    size_t numThreads,
    double writeFraction,
    bool useSeparateLocks) {
  runMixed<atomic, RWTicketSpinLock64, Locker>(
      numOps, numThreads, writeFraction, useSeparateLocks);
}

static void boost_shared(
    size_t numOps,
    size_t numThreads,
    double writeFraction,
    bool useSeparateLocks) {
  runMixed<atomic, boost::shared_mutex, Locker>(
      numOps, numThreads, writeFraction, useSeparateLocks);
}

static void pthrd_rwlock(
    size_t numOps,
    size_t numThreads,
    double writeFraction,
    bool useSeparateLocks) {
  runMixed<atomic, PosixRWLock, Locker>(
      numOps, numThreads, writeFraction, useSeparateLocks);
}

static void pthrd_mutex_(
    size_t numOps,
    size_t numThreads,
    double writeFraction,
    bool useSeparateLocks) {
  runMixed<atomic, PosixMutex, Locker>(
      numOps, numThreads, writeFraction, useSeparateLocks);
}

static void timed_wr_pri(
    size_t numOps,
    size_t numThreads,
    double writeFraction,
    bool useSeparateLocks) {
  runMixed<atomic, fibers::TimedRWMutexWritePriority<fibers::Baton>, Locker>(
      numOps, numThreads, writeFraction, useSeparateLocks);
}

static void timed_rd_pri(
    size_t numOps,
    size_t numThreads,
    double writeFraction,
    bool useSeparateLocks) {
  runMixed<atomic, fibers::TimedRWMutexReadPriority<fibers::Baton>, Locker>(
      numOps, numThreads, writeFraction, useSeparateLocks);
}

template <typename Lock, template <typename> class Atom>
static void runAllAndValidate(size_t numOps, size_t numThreads) {
  Lock globalLock;
  Atom<int> globalExclusiveCount(0);
  Atom<int> globalUpgradeCount(0);
  Atom<int> globalSharedCount(0);

  Atom<bool> go(false);

  // clang crashes on access to Atom<> captured by ref in closure
  Atom<int>* globalExclusiveCountPtr = &globalExclusiveCount;
  Atom<int>* globalUpgradeCountPtr = &globalUpgradeCount;
  Atom<int>* globalSharedCountPtr = &globalSharedCount;
  Atom<bool>* goPtr = &go;

  vector<thread> threads(numThreads);

  BENCHMARK_SUSPEND {
    for (size_t t = 0; t < numThreads; ++t) {
      threads[t] = DSched::thread([&, t, numThreads] {
        std::minstd_rand engine;
        engine.seed(t);

        bool exclusive = false;
        bool upgrade = false;
        bool shared = false;
        bool ourGlobalTokenUsed = false;
        SharedMutexToken ourGlobalToken;

        Lock privateLock;
        vector<SharedMutexToken> privateTokens;

        while (!goPtr->load()) {
          this_thread::yield();
        }
        for (size_t op = t; op < numOps; op += numThreads) {
          // randVal in [0,1000)
          long randVal = engine();
          randVal = (long)((randVal * (uint64_t)1000) / 0x7fffffff);

          // make as many assertions as possible about the global state
          if (exclusive) {
            EXPECT_EQ(1, globalExclusiveCountPtr->load(memory_order_acquire));
            EXPECT_EQ(0, globalUpgradeCountPtr->load(memory_order_acquire));
            EXPECT_EQ(0, globalSharedCountPtr->load(memory_order_acquire));
          }
          if (upgrade) {
            EXPECT_EQ(0, globalExclusiveCountPtr->load(memory_order_acquire));
            EXPECT_EQ(1, globalUpgradeCountPtr->load(memory_order_acquire));
          }
          if (shared) {
            EXPECT_EQ(0, globalExclusiveCountPtr->load(memory_order_acquire));
            EXPECT_TRUE(globalSharedCountPtr->load(memory_order_acquire) > 0);
          } else {
            EXPECT_FALSE(ourGlobalTokenUsed);
          }

          // independent 20% chance we do something to the private lock
          if (randVal < 200) {
            // it's okay to take multiple private shared locks because
            // we never take an exclusive lock, so reader versus writer
            // priority doesn't cause deadlocks
            if (randVal < 100 && privateTokens.size() > 0) {
              auto i = randVal % privateTokens.size();
              privateLock.unlock_shared(privateTokens[i]);
              privateTokens.erase(privateTokens.begin() + i);
            } else {
              SharedMutexToken token;
              privateLock.lock_shared(token);
              privateTokens.push_back(token);
            }
            continue;
          }

          // if we've got a lock, the only thing we can do is release it
          // or transform it into a different kind of lock
          if (exclusive) {
            exclusive = false;
            --*globalExclusiveCountPtr;
            if (randVal < 500) {
              globalLock.unlock();
            } else if (randVal < 700) {
              globalLock.unlock_and_lock_shared();
              ++*globalSharedCountPtr;
              shared = true;
            } else if (randVal < 900) {
              globalLock.unlock_and_lock_shared(ourGlobalToken);
              ++*globalSharedCountPtr;
              shared = true;
              ourGlobalTokenUsed = true;
            } else {
              globalLock.unlock_and_lock_upgrade();
              ++*globalUpgradeCountPtr;
              upgrade = true;
            }
          } else if (upgrade) {
            upgrade = false;
            --*globalUpgradeCountPtr;
            if (randVal < 500) {
              globalLock.unlock_upgrade();
            } else if (randVal < 700) {
              globalLock.unlock_upgrade_and_lock_shared();
              ++*globalSharedCountPtr;
              shared = true;
            } else if (randVal < 900) {
              globalLock.unlock_upgrade_and_lock_shared(ourGlobalToken);
              ++*globalSharedCountPtr;
              shared = true;
              ourGlobalTokenUsed = true;
            } else {
              globalLock.unlock_upgrade_and_lock();
              ++*globalExclusiveCountPtr;
              exclusive = true;
            }
          } else if (shared) {
            shared = false;
            --*globalSharedCountPtr;
            if (ourGlobalTokenUsed) {
              globalLock.unlock_shared(ourGlobalToken);
              ourGlobalTokenUsed = false;
            } else {
              globalLock.unlock_shared();
            }
          } else if (randVal < 400) {
            // 40% chance of shared lock with token, 5 ways to get it

            // delta t goes from -1 millis to 7 millis
            auto dt = microseconds(10 * (randVal - 100));

            if (randVal < 400) {
              globalLock.lock_shared(ourGlobalToken);
              shared = true;
            } else if (randVal < 500) {
              shared = globalLock.try_lock_shared(ourGlobalToken);
            } else if (randVal < 600) {
              shared = globalLock.try_lock_shared_for(dt, ourGlobalToken);
            } else if (randVal < 800) {
              shared = globalLock.try_lock_shared_until(
                  system_clock::now() + dt, ourGlobalToken);
            }
            if (shared) {
              ourGlobalTokenUsed = true;
              ++*globalSharedCountPtr;
            }
          } else if (randVal < 800) {
            // 40% chance of shared lock without token
            auto dt = microseconds(10 * (randVal - 100));
            if (randVal < 400) {
              globalLock.lock_shared();
              shared = true;
            } else if (randVal < 500) {
              shared = globalLock.try_lock_shared();
            } else if (randVal < 600) {
              shared = globalLock.try_lock_shared_for(dt);
            } else if (randVal < 800) {
              shared =
                  globalLock.try_lock_shared_until(system_clock::now() + dt);
            }
            if (shared) {
              ++*globalSharedCountPtr;
            }
          } else if (randVal < 900) {
            // 10% change of upgrade lock
            globalLock.lock_upgrade();
            upgrade = true;
            ++*globalUpgradeCountPtr;
          } else {
            // 10% chance of exclusive lock, 5 ways to get it

            // delta t goes from -1 millis to 9 millis
            auto dt = microseconds(100 * (randVal - 910));

            if (randVal < 400) {
              globalLock.lock();
              exclusive = true;
            } else if (randVal < 500) {
              exclusive = globalLock.try_lock();
            } else if (randVal < 600) {
              exclusive = globalLock.try_lock_for(dt);
            } else if (randVal < 700) {
              exclusive = globalLock.try_lock_until(steady_clock::now() + dt);
            } else {
              exclusive = globalLock.try_lock_until(system_clock::now() + dt);
            }
            if (exclusive) {
              ++*globalExclusiveCountPtr;
            }
          }
        }

        if (exclusive) {
          --*globalExclusiveCountPtr;
          globalLock.unlock();
        }
        if (upgrade) {
          --*globalUpgradeCountPtr;
          globalLock.unlock_upgrade();
        }
        if (shared) {
          --*globalSharedCountPtr;
          if (ourGlobalTokenUsed) {
            globalLock.unlock_shared(ourGlobalToken);
            ourGlobalTokenUsed = false;
          } else {
            globalLock.unlock_shared();
          }
        }
        for (auto& token : privateTokens) {
          privateLock.unlock_shared(token);
        }
      });
    }
  }

  go.store(true);
  for (auto& thr : threads) {
    DSched::join(thr);
  }
}

TEST(SharedMutex, deterministicConcurrentReadersOfOneLockReadPrio) {
  for (int pass = 0; pass < 3; ++pass) {
    DSched sched(DSched::uniform(pass));
    runContendedReaders<DeterministicAtomic, DSharedMutexReadPriority, Locker>(
        1000, 3, false);
  }
}

TEST(SharedMutex, deterministicConcurrentReadersOfOneLockWritePrio) {
  for (int pass = 0; pass < 3; ++pass) {
    DSched sched(DSched::uniform(pass));
    runContendedReaders<DeterministicAtomic, DSharedMutexWritePriority, Locker>(
        1000, 3, false);
  }
}

TEST(SharedMutex, concurrentReadersOfOneLockReadPrio) {
  for (int pass = 0; pass < 10; ++pass) {
    runContendedReaders<atomic, SharedMutexReadPriority, Locker>(
        100000, 32, false);
  }
}

TEST(SharedMutex, concurrentReadersOfOneLockWritePrio) {
  for (int pass = 0; pass < 10; ++pass) {
    runContendedReaders<atomic, SharedMutexWritePriority, Locker>(
        100000, 32, false);
  }
}

TEST(SharedMutex, deterministicReadersOfConcurrentLocksReadPrio) {
  for (int pass = 0; pass < 3; ++pass) {
    DSched sched(DSched::uniform(pass));
    runContendedReaders<DeterministicAtomic, DSharedMutexReadPriority, Locker>(
        1000, 3, true);
  }
}

TEST(SharedMutex, deterministicReadersOfConcurrentLocksWritePrio) {
  for (int pass = 0; pass < 3; ++pass) {
    DSched sched(DSched::uniform(pass));
    runContendedReaders<DeterministicAtomic, DSharedMutexWritePriority, Locker>(
        1000, 3, true);
  }
}

TEST(SharedMutex, readersOfConcurrentLocksReadPrio) {
  for (int pass = 0; pass < 10; ++pass) {
    runContendedReaders<atomic, SharedMutexReadPriority, TokenLocker>(
        100000, 32, true);
  }
}

TEST(SharedMutex, readersOfConcurrentLocksWritePrio) {
  for (int pass = 0; pass < 10; ++pass) {
    runContendedReaders<atomic, SharedMutexWritePriority, TokenLocker>(
        100000, 32, true);
  }
}

TEST(SharedMutex, deterministicMixedMostlyReadReadPrio) {
  for (int pass = 0; pass < 3; ++pass) {
    DSched sched(DSched::uniform(pass));
    runMixed<DeterministicAtomic, DSharedMutexReadPriority, Locker>(
        1000, 3, 0.1, false);
  }
}

TEST(SharedMutex, deterministicMixedMostlyReadWritePrio) {
  for (int pass = 0; pass < 3; ++pass) {
    DSched sched(DSched::uniform(pass));
    runMixed<DeterministicAtomic, DSharedMutexWritePriority, Locker>(
        1000, 3, 0.1, false);
  }
}

TEST(SharedMutex, mixedMostlyReadReadPrio) {
  for (int pass = 0; pass < 5; ++pass) {
    runMixed<atomic, SharedMutexReadPriority, TokenLocker>(
        10000, 32, 0.1, false);
  }
}

TEST(SharedMutex, mixedMostlyReadWritePrio) {
  for (int pass = 0; pass < 5; ++pass) {
    runMixed<atomic, SharedMutexWritePriority, TokenLocker>(
        10000, 32, 0.1, false);
  }
}

TEST(SharedMutex, deterministicMixedMostlyWriteReadPrio) {
  for (int pass = 0; pass < 1; ++pass) {
    DSched sched(DSched::uniform(pass));
    runMixed<DeterministicAtomic, DSharedMutexReadPriority, TokenLocker>(
        1000, 10, 0.9, false);
  }
}

TEST(SharedMutex, deterministicMixedMostlyWriteWritePrio) {
  for (int pass = 0; pass < 1; ++pass) {
    DSched sched(DSched::uniform(pass));
    runMixed<DeterministicAtomic, DSharedMutexWritePriority, TokenLocker>(
        1000, 10, 0.9, false);
  }
}

TEST(SharedMutex, deterministicLostWakeupWritePrio) {
  for (int pass = 0; pass < 10; ++pass) {
    DSched sched(DSched::uniformSubset(pass, 2, 200));
    runMixed<DeterministicAtomic, DSharedMutexWritePriority, TokenLocker>(
        1000, 3, 1.0, false);
  }
}

// In TSAN, tests run a lot slower. To avoid test timeouts, adjust the number
// of repetitions we need for tests.
static std::size_t adjustReps(std::size_t reps) {
  if (folly::kIsSanitizeThread) {
    return reps / 10;
  }
  return reps;
}

TEST(SharedMutex, mixedMostlyWriteReadPrio) {
  for (int pass = 0; pass < (folly::kIsSanitizeAddress ? 1 : 5); ++pass) {
    runMixed<atomic, SharedMutexReadPriority, TokenLocker>(
        adjustReps(50000), adjustReps(300), 0.9, false);
  }
}

TEST(SharedMutex, mixedMostlyWriteWritePrio) {
  for (int pass = 0; pass < (folly::kIsSanitizeAddress ? 1 : 5); ++pass) {
    runMixed<atomic, SharedMutexWritePriority, TokenLocker>(
        adjustReps(50000), adjustReps(300), 0.9, false);
  }
}

TEST(SharedMutex, deterministicAllOpsReadPrio) {
  for (int pass = 0; pass < 5; ++pass) {
    DSched sched(DSched::uniform(pass));
    runAllAndValidate<DSharedMutexReadPriority, DeterministicAtomic>(1000, 8);
  }
}

TEST(SharedMutex, deterministicAllOpsWritePrio) {
  // This test fails in TSAN because of noisy lock ordering inversions.
  SKIP_IF(folly::kIsSanitizeThread);
  for (int pass = 0; pass < 5; ++pass) {
    DSched sched(DSched::uniform(pass));
    runAllAndValidate<DSharedMutexWritePriority, DeterministicAtomic>(1000, 8);
  }
}

TEST(SharedMutex, allOpsReadPrio) {
  for (int pass = 0; pass < 5; ++pass) {
    runAllAndValidate<SharedMutexReadPriority, atomic>(100000, 32);
  }
}

TEST(SharedMutex, allOpsWritePrio) {
  // This test fails in TSAN because of noisy lock ordering inversions.
  SKIP_IF(folly::kIsSanitizeThread);
  for (int pass = 0; pass < 5; ++pass) {
    runAllAndValidate<SharedMutexWritePriority, atomic>(100000, 32);
  }
}

FOLLY_ASSUME_FBVECTOR_COMPATIBLE(std::optional<std::optional<SharedMutexToken>>)

// Setup is a set of threads that either grab a shared lock, or exclusive
// and then downgrade it, or upgrade then upgrade and downgrade, then
// enqueue the shared lock to a second set of threads that just performs
// unlocks.  Half of the shared locks use tokens, the others don't.
template <typename Lock, template <typename> class Atom>
static void runRemoteUnlock(
    size_t numOps,
    double preWriteFraction,
    double preUpgradeFraction,
    size_t numSendingThreads,
    size_t numReceivingThreads) {
  Lock globalLock;
  MPMCQueue<std::optional<std::optional<SharedMutexToken>>, Atom> queue(10);
  auto queuePtr = &queue; // workaround for clang crash

  Atom<bool> go(false);
  auto goPtr = &go; // workaround for clang crash
  Atom<int> pendingSenders(numSendingThreads);
  auto pendingSendersPtr = &pendingSenders; // workaround for clang crash
  vector<thread> threads(numSendingThreads + numReceivingThreads);

  BENCHMARK_SUSPEND {
    for (size_t t = 0; t < threads.size(); ++t) {
      threads[t] = DSched::thread([&, t, numSendingThreads] {
        if (t >= numSendingThreads) {
          // we're a receiver
          typename decltype(queue)::value_type elem;
          while (true) {
            queuePtr->blockingRead(elem);
            if (!elem) {
              // EOF, pass the EOF token
              queuePtr->blockingWrite(std::move(elem));
              break;
            }
            if (*elem) {
              globalLock.unlock_shared(**elem);
            } else {
              globalLock.unlock_shared();
            }
          }
          return;
        }
        // else we're a sender

        std::minstd_rand engine;
        engine.seed(t);

        while (!goPtr->load()) {
          this_thread::yield();
        }
        for (size_t op = t; op < numOps; op += numSendingThreads) {
          long unscaledRandVal = engine();

          // randVal in [0,1]
          double randVal = ((double)unscaledRandVal) / 0x7fffffff;

          // extract a bit and rescale
          bool useToken = randVal >= 0.5;
          randVal = (randVal - (useToken ? 0.5 : 0.0)) * 2;

          std::optional<SharedMutexToken> maybeToken;

          if (useToken) {
            SharedMutexToken token;
            if (randVal < preWriteFraction) {
              globalLock.lock();
              globalLock.unlock_and_lock_shared(token);
            } else if (randVal < preWriteFraction + preUpgradeFraction / 2) {
              globalLock.lock_upgrade();
              globalLock.unlock_upgrade_and_lock_shared(token);
            } else if (randVal < preWriteFraction + preUpgradeFraction) {
              globalLock.lock_upgrade();
              globalLock.unlock_upgrade_and_lock();
              globalLock.unlock_and_lock_shared(token);
            } else {
              globalLock.lock_shared(token);
            }
            maybeToken = token;
          } else {
            if (randVal < preWriteFraction) {
              globalLock.lock();
              globalLock.unlock_and_lock_shared();
            } else if (randVal < preWriteFraction + preUpgradeFraction / 2) {
              globalLock.lock_upgrade();
              globalLock.unlock_upgrade_and_lock_shared();
            } else if (randVal < preWriteFraction + preUpgradeFraction) {
              globalLock.lock_upgrade();
              globalLock.unlock_upgrade_and_lock();
              globalLock.unlock_and_lock_shared();
            } else {
              globalLock.lock_shared();
            }
          }

          // blockingWrite is emplace-like, so this automatically adds
          // another level of wrapping
          queuePtr->blockingWrite(maybeToken);
        }
        if (--*pendingSendersPtr == 0) {
          queuePtr->blockingWrite(std::nullopt);
        }
      });
    }
  }

  go.store(true);
  for (auto& thr : threads) {
    DSched::join(thr);
  }
}

TEST(SharedMutex, deterministicRemoteWritePrio) {
  // This test fails in an assertion in the TSAN library because there are too
  // many mutexes
  SKIP_IF(folly::kIsSanitizeThread);
  for (int pass = 0; pass < 1; ++pass) {
    DSched sched(DSched::uniform(pass));
    runRemoteUnlock<DSharedMutexWritePriority, DeterministicAtomic>(
        500, 0.1, 0.1, 5, 5);
  }
}

TEST(SharedMutex, deterministicRemoteReadPrio) {
  for (int pass = 0; pass < 1; ++pass) {
    DSched sched(DSched::uniform(pass));
    runRemoteUnlock<DSharedMutexReadPriority, DeterministicAtomic>(
        500, 0.1, 0.1, 5, 5);
  }
}

TEST(SharedMutex, remoteWritePrio) {
  // This test fails in an assertion in the TSAN library because there are too
  // many mutexes
  SKIP_IF(folly::kIsSanitizeThread);
  for (int pass = 0; pass < 10; ++pass) {
    runRemoteUnlock<SharedMutexWritePriority, atomic>(100000, 0.1, 0.1, 5, 5);
  }
}

TEST(SharedMutex, remoteReadPrio) {
  // This test fails in an assertion in the TSAN library because there are too
  // many mutexes
  SKIP_IF(folly::kIsSanitizeThread);
  for (int pass = 0; pass < (folly::kIsSanitizeAddress ? 1 : 100); ++pass) {
    runRemoteUnlock<SharedMutexReadPriority, atomic>(100000, 0.1, 0.1, 5, 5);
  }
}

static void burn(size_t n) {
  for (size_t i = 0; i < n; ++i) {
    folly::doNotOptimizeAway(i);
  }
}

// Two threads and three locks, arranged so that they have to proceed
// in turn with reader/writer conflict
template <typename Lock, template <typename> class Atom = atomic>
static void runPingPong(size_t numRounds, size_t burnCount) {
  struct alignas(hardware_destructive_interference_size) PaddedLock {
    Lock lock_;
  };
  array<PaddedLock, 3> paddedLocks;
  Atom<int> avail(0);
  auto availPtr = &avail; // workaround for clang crash
  Atom<bool> go(false);
  auto goPtr = &go; // workaround for clang crash
  vector<thread> threads(2);

  paddedLocks[0].lock_.lock();
  paddedLocks[1].lock_.lock();
  paddedLocks[2].lock_.lock_shared();

  BENCHMARK_SUSPEND {
    threads[0] = DSched::thread([&] {
      ++*availPtr;
      while (!goPtr->load()) {
        this_thread::yield();
      }
      for (size_t i = 0; i < numRounds; ++i) {
        paddedLocks[i % 3].lock_.unlock();
        paddedLocks[(i + 2) % 3].lock_.lock();
        burn(burnCount);
      }
    });
    threads[1] = DSched::thread([&] {
      ++*availPtr;
      while (!goPtr->load()) {
        this_thread::yield();
      }
      for (size_t i = 0; i < numRounds; ++i) {
        paddedLocks[i % 3].lock_.lock_shared();
        burn(burnCount);
        paddedLocks[(i + 2) % 3].lock_.unlock_shared();
      }
    });

    while (avail.load() < 2) {
      this_thread::yield();
    }
  }

  go.store(true);
  for (auto& thr : threads) {
    DSched::join(thr);
  }
  paddedLocks[numRounds % 3].lock_.unlock();
  paddedLocks[(numRounds + 1) % 3].lock_.unlock();
  paddedLocks[(numRounds + 2) % 3].lock_.unlock_shared();
}

template <class F>
static void parallelRun(size_t numThreads, F f) {
  std::vector<thread> threads;
  for (size_t tid = 0; tid < numThreads; ++tid) {
    threads.emplace_back([tid, &f] { f(tid); });
  }
  for (auto& t : threads) {
    t.join();
  }
}

static void folly_rwspin_ping_pong(size_t n, size_t scale, size_t burnCount) {
  runPingPong<RWSpinLock>(n / scale, burnCount);
}

static void shmtx_w_bare_ping_pong(size_t n, size_t scale, size_t burnCount) {
  runPingPong<SharedMutexWritePriority>(n / scale, burnCount);
}

static void shmtx_r_bare_ping_pong(size_t n, size_t scale, size_t burnCount) {
  runPingPong<SharedMutexReadPriority>(n / scale, burnCount);
}

static void folly_ticket_ping_pong(size_t n, size_t scale, size_t burnCount) {
  runPingPong<RWTicketSpinLock64>(n / scale, burnCount);
}

static void boost_shared_ping_pong(size_t n, size_t scale, size_t burnCount) {
  runPingPong<boost::shared_mutex>(n / scale, burnCount);
}

static void pthrd_rwlock_ping_pong(size_t n, size_t scale, size_t burnCount) {
  runPingPong<PosixRWLock>(n / scale, burnCount);
}

static void timed_wr_pri_ping_pong(size_t n, size_t scale, size_t burnCount) {
  runPingPong<fibers::TimedRWMutexWritePriority<fibers::Baton>>(
      n / scale, burnCount);
}

static void timed_rd_pri_ping_pong(size_t n, size_t scale, size_t burnCount) {
  runPingPong<fibers::TimedRWMutexReadPriority<fibers::Baton>>(
      n / scale, burnCount);
}

TEST(SharedMutex, deterministicPingPongWritePrio) {
  // This test fails in TSAN because some mutexes are lock_shared() in one
  // thread and unlock_shared() in a different thread.
  SKIP_IF(folly::kIsSanitizeThread);
  for (int pass = 0; pass < 1; ++pass) {
    DSched sched(DSched::uniform(pass));
    runPingPong<DSharedMutexWritePriority, DeterministicAtomic>(500, 0);
  }
}

TEST(SharedMutex, deterministicPingPongReadPrio) {
  for (int pass = 0; pass < 1; ++pass) {
    DSched sched(DSched::uniform(pass));
    runPingPong<DSharedMutexReadPriority, DeterministicAtomic>(500, 0);
  }
}

TEST(SharedMutex, pingPongWritePrio) {
  // This test fails in TSAN because some mutexes are lock_shared() in one
  // thread and unlock_shared() in a different thread.
  SKIP_IF(folly::kIsSanitizeThread);
  for (int pass = 0; pass < 1; ++pass) {
    runPingPong<SharedMutexWritePriority, atomic>(50000, 0);
  }
}

TEST(SharedMutex, pingPongReadPrio) {
  for (int pass = 0; pass < 1; ++pass) {
    runPingPong<SharedMutexReadPriority, atomic>(50000, 0);
  }
}

TEST(SharedMutex, releaseToken) {
  {
    SharedMutex mutex;
    // Ensure sufficient contention that we get deferred locks.
    parallelRun(8, [&](size_t) {
      for (size_t i = 0; i < 100; ++i) {
        SharedMutexToken token;
        mutex.lock_shared(token);
        /* sleep override */ std::this_thread::sleep_for(1ms);
        mutex.release_token(token);
        mutex.unlock_shared();
      }
    });
  }
  {
    SharedMutex mutex;
    parallelRun(8, [&](size_t) {
      for (size_t i = 0; i < 100; ++i) {
        std::shared_lock lock{mutex};
        /* sleep override */ std::this_thread::sleep_for(1ms);
        lock.release();
        mutex.unlock_shared();
      }
    });
  }
}

TEST(SharedMutex, sharedLockBasic) {
  SharedMutex mutex;

  std::shared_lock shared_lock{mutex};
  EXPECT_TRUE(shared_lock.owns_lock());
  EXPECT_EQ(&mutex, shared_lock.mutex());

  shared_lock.unlock();
  EXPECT_FALSE(shared_lock.owns_lock());
  EXPECT_EQ(&mutex, shared_lock.mutex());

  std::unique_lock unique_lock{mutex};
  EXPECT_TRUE(unique_lock.owns_lock());
}

TEST(SharedMutex, sharedLockDefaultConstructor) {
  std::shared_lock<SharedMutex> lock;
  EXPECT_FALSE(lock.owns_lock());
  EXPECT_EQ(nullptr, lock.mutex());
  EXPECT_EQ(lock.release(), nullptr);
}

TEST(SharedMutex, sharedLockMutexConstructor) {
  SharedMutex mutex;
  std::shared_lock lock{mutex};

  EXPECT_TRUE(lock.owns_lock());
  EXPECT_EQ(&mutex, lock.mutex());
}

TEST(SharedMutex, sharedLockDeferConstructor) {
  SharedMutex mutex;
  std::shared_lock lock{mutex, std::defer_lock};

  EXPECT_FALSE(lock.owns_lock());
  EXPECT_EQ(&mutex, lock.mutex());

  EXPECT_EQ(&mutex, lock.release());
}

TEST(SharedMutex, sharedLockTryToConstructor) {
  SharedMutex mutex;

  {
    std::shared_lock lock{mutex, std::try_to_lock};

    EXPECT_TRUE(lock.owns_lock());
    EXPECT_EQ(&mutex, lock.mutex());
  }

  {
    std::unique_lock unique{mutex};
    EXPECT_TRUE(unique.owns_lock());

    std::shared_lock shared{mutex, std::try_to_lock};

    EXPECT_FALSE(shared.owns_lock());
    EXPECT_EQ(&mutex, shared.mutex());
  }
}

TEST(SharedMutex, sharedLockAdoptConstructor) {
  SharedMutex mutex;

  {
    mutex.lock_shared();
    std::shared_lock lock{mutex, std::adopt_lock};

    EXPECT_TRUE(lock.owns_lock());
    EXPECT_EQ(&mutex, lock.mutex());
  }

  {
    SharedMutexToken token;
    mutex.lock_shared(token);
    std::shared_lock lock{mutex, std::adopt_lock};

    EXPECT_TRUE(lock.owns_lock());
    EXPECT_EQ(&mutex, lock.mutex());
  }
}

TEST(SharedMutex, sharedLockDeadlineConstructor) {
  using namespace std::chrono_literals;
  const std::chrono::time_point<std::chrono::system_clock> deadline =
      std::chrono::system_clock::now() + 1ms;
  SharedMutex mutex;
  std::shared_lock lock{mutex, deadline};

  EXPECT_TRUE(lock.owns_lock());
  EXPECT_EQ(&mutex, lock.mutex());
}

TEST(SharedMutex, sharedLockTimeoutConstructor) {
  using namespace std::chrono_literals;
  SharedMutex mutex;
  std::shared_lock lock{mutex, /* timeout = */ 1ms};

  EXPECT_TRUE(lock.owns_lock());
  EXPECT_EQ(&mutex, lock.mutex());
}

TEST(SharedMutex, sharedLockDoubleUnlock) {
  SharedMutex mutex;
  std::shared_lock lock{mutex};
  lock.unlock();

  EXPECT_THROW(lock.unlock(), std::system_error);
}

TEST(SharedMutex, sharedLockLockNoMutex) {
  std::shared_lock<SharedMutex> lock;

  EXPECT_THROW(lock.lock(), std::system_error);
}

TEST(SharedMutex, sharedLockLockAlreadyHeld) {
  SharedMutex mutex;
  std::shared_lock lock{mutex};

  EXPECT_TRUE(lock.owns_lock());
  EXPECT_THROW(lock.lock(), std::system_error);
}

// This is here so you can tell how much of the runtime reported by the
// more complex harnesses is due to the harness, although due to the
// magic of compiler optimization it may also be slower
BENCHMARK(single_thread_lock_shared_unlock_shared, iters) {
  SharedMutex lock;
  for (size_t n = 0; n < iters; ++n) {
    SharedMutex::Token token;
    lock.lock_shared(token);
    folly::doNotOptimizeAway(0);
    lock.unlock_shared(token);
  }
}

BENCHMARK(single_thread_lock_unlock, iters) {
  SharedMutex lock;
  for (size_t n = 0; n < iters; ++n) {
    lock.lock();
    folly::doNotOptimizeAway(0);
    lock.unlock();
  }
}

#define BENCH_BASE(...) FB_VA_GLUE(BENCHMARK_NAMED_PARAM, (__VA_ARGS__))
#define BENCH_REL(...) FB_VA_GLUE(BENCHMARK_RELATIVE_NAMED_PARAM, (__VA_ARGS__))

// 100% reads.  Best-case scenario for deferred locks.  Lock is colocated
// with read data, so inline lock takes cache miss every time but deferred
// lock has only cache hits and local access.
BENCHMARK_DRAW_LINE();
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin_reads, 1thread, 1, false)
BENCH_REL(shmtx_wr_pri_reads, 1thread, 1, false)
BENCH_REL(shmtx_w_bare_reads, 1thread, 1, false)
BENCH_REL(shmtx_rd_pri_reads, 1thread, 1, false)
BENCH_REL(shmtx_r_bare_reads, 1thread, 1, false)
BENCH_REL(folly_ticket_reads, 1thread, 1, false)
BENCH_REL(boost_shared_reads, 1thread, 1, false)
BENCH_REL(pthrd_rwlock_reads, 1thread, 1, false)
BENCH_REL(timed_wr_pri_reads, 1thread, 1, false)
BENCH_REL(timed_rd_pri_reads, 1thread, 1, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin_reads, 2thread, 2, false)
BENCH_REL(shmtx_wr_pri_reads, 2thread, 2, false)
BENCH_REL(shmtx_w_bare_reads, 2thread, 2, false)
BENCH_REL(shmtx_rd_pri_reads, 2thread, 2, false)
BENCH_REL(shmtx_r_bare_reads, 2thread, 2, false)
BENCH_REL(folly_ticket_reads, 2thread, 2, false)
BENCH_REL(boost_shared_reads, 2thread, 2, false)
BENCH_REL(pthrd_rwlock_reads, 2thread, 2, false)
BENCH_REL(timed_wr_pri_reads, 2thread, 2, false)
BENCH_REL(timed_rd_pri_reads, 2thread, 2, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin_reads, 4thread, 4, false)
BENCH_REL(shmtx_wr_pri_reads, 4thread, 4, false)
BENCH_REL(shmtx_w_bare_reads, 4thread, 4, false)
BENCH_REL(shmtx_rd_pri_reads, 4thread, 4, false)
BENCH_REL(shmtx_r_bare_reads, 4thread, 4, false)
BENCH_REL(folly_ticket_reads, 4thread, 4, false)
BENCH_REL(boost_shared_reads, 4thread, 4, false)
BENCH_REL(pthrd_rwlock_reads, 4thread, 4, false)
BENCH_REL(timed_wr_pri_reads, 4thread, 4, false)
BENCH_REL(timed_rd_pri_reads, 4thread, 4, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin_reads, 8thread, 8, false)
BENCH_REL(shmtx_wr_pri_reads, 8thread, 8, false)
BENCH_REL(shmtx_w_bare_reads, 8thread, 8, false)
BENCH_REL(shmtx_rd_pri_reads, 8thread, 8, false)
BENCH_REL(shmtx_r_bare_reads, 8thread, 8, false)
BENCH_REL(folly_ticket_reads, 8thread, 8, false)
BENCH_REL(boost_shared_reads, 8thread, 8, false)
BENCH_REL(pthrd_rwlock_reads, 8thread, 8, false)
BENCH_REL(timed_wr_pri_reads, 8thread, 8, false)
BENCH_REL(timed_rd_pri_reads, 8thread, 8, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin_reads, 16thread, 16, false)
BENCH_REL(shmtx_wr_pri_reads, 16thread, 16, false)
BENCH_REL(shmtx_w_bare_reads, 16thread, 16, false)
BENCH_REL(shmtx_rd_pri_reads, 16thread, 16, false)
BENCH_REL(shmtx_r_bare_reads, 16thread, 16, false)
BENCH_REL(folly_ticket_reads, 16thread, 16, false)
BENCH_REL(boost_shared_reads, 16thread, 16, false)
BENCH_REL(pthrd_rwlock_reads, 16thread, 16, false)
BENCH_REL(timed_wr_pri_reads, 16thread, 16, false)
BENCH_REL(timed_rd_pri_reads, 16thread, 16, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin_reads, 32thread, 32, false)
BENCH_REL(shmtx_wr_pri_reads, 32thread, 32, false)
BENCH_REL(shmtx_w_bare_reads, 32thread, 32, false)
BENCH_REL(shmtx_rd_pri_reads, 32thread, 32, false)
BENCH_REL(shmtx_r_bare_reads, 32thread, 32, false)
BENCH_REL(folly_ticket_reads, 32thread, 32, false)
BENCH_REL(boost_shared_reads, 32thread, 32, false)
BENCH_REL(pthrd_rwlock_reads, 32thread, 32, false)
BENCH_REL(timed_wr_pri_reads, 32thread, 32, false)
BENCH_REL(timed_rd_pri_reads, 32thread, 32, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin_reads, 64thread, 64, false)
BENCH_REL(shmtx_wr_pri_reads, 64thread, 64, false)
BENCH_REL(shmtx_w_bare_reads, 64thread, 64, false)
BENCH_REL(shmtx_rd_pri_reads, 64thread, 64, false)
BENCH_REL(shmtx_r_bare_reads, 64thread, 64, false)
BENCH_REL(folly_ticket_reads, 64thread, 64, false)
BENCH_REL(boost_shared_reads, 64thread, 64, false)
BENCH_REL(pthrd_rwlock_reads, 64thread, 64, false)
BENCH_REL(timed_wr_pri_reads, 64thread, 64, false)
BENCH_REL(timed_rd_pri_reads, 64thread, 64, false)

// 1 lock used by everybody, 100% writes.  Threads only hurt, but it is
// good to not fail catastrophically.  Compare to single_thread_lock_unlock
// to see the overhead of the generic driver (and its pseudo-random number
// generator).  pthrd_mutex_ is a pthread_mutex_t (default, not adaptive),
// which is better than any of the reader-writer locks for this scenario.
BENCHMARK_DRAW_LINE();
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 1thread_all_write, 1, 1.0, false)
BENCH_REL(shmtx_wr_pri, 1thread_all_write, 1, 1.0, false)
BENCH_REL(shmtx_rd_pri, 1thread_all_write, 1, 1.0, false)
BENCH_REL(folly_ticket, 1thread_all_write, 1, 1.0, false)
BENCH_REL(boost_shared, 1thread_all_write, 1, 1.0, false)
BENCH_REL(pthrd_rwlock, 1thread_all_write, 1, 1.0, false)
BENCH_REL(pthrd_mutex_, 1thread_all_write, 1, 1.0, false)
BENCH_REL(timed_wr_pri, 1thread_all_write, 1, 1.0, false)
BENCH_REL(timed_rd_pri, 1thread_all_write, 1, 1.0, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 2thread_all_write, 2, 1.0, false)
BENCH_REL(shmtx_wr_pri, 2thread_all_write, 2, 1.0, false)
BENCH_REL(shmtx_rd_pri, 2thread_all_write, 2, 1.0, false)
BENCH_REL(folly_ticket, 2thread_all_write, 2, 1.0, false)
BENCH_REL(boost_shared, 2thread_all_write, 2, 1.0, false)
BENCH_REL(pthrd_rwlock, 2thread_all_write, 2, 1.0, false)
BENCH_REL(pthrd_mutex_, 2thread_all_write, 2, 1.0, false)
BENCH_REL(timed_wr_pri, 2thread_all_write, 2, 1.0, false)
BENCH_REL(timed_rd_pri, 2thread_all_write, 2, 1.0, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 4thread_all_write, 4, 1.0, false)
BENCH_REL(shmtx_wr_pri, 4thread_all_write, 4, 1.0, false)
BENCH_REL(shmtx_rd_pri, 4thread_all_write, 4, 1.0, false)
BENCH_REL(folly_ticket, 4thread_all_write, 4, 1.0, false)
BENCH_REL(boost_shared, 4thread_all_write, 4, 1.0, false)
BENCH_REL(pthrd_rwlock, 4thread_all_write, 4, 1.0, false)
BENCH_REL(pthrd_mutex_, 4thread_all_write, 4, 1.0, false)
BENCH_REL(timed_wr_pri, 4thread_all_write, 4, 1.0, false)
BENCH_REL(timed_rd_pri, 4thread_all_write, 4, 1.0, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 8thread_all_write, 8, 1.0, false)
BENCH_REL(shmtx_wr_pri, 8thread_all_write, 8, 1.0, false)
BENCH_REL(shmtx_rd_pri, 8thread_all_write, 8, 1.0, false)
BENCH_REL(folly_ticket, 8thread_all_write, 8, 1.0, false)
BENCH_REL(boost_shared, 8thread_all_write, 8, 1.0, false)
BENCH_REL(pthrd_rwlock, 8thread_all_write, 8, 1.0, false)
BENCH_REL(pthrd_mutex_, 8thread_all_write, 8, 1.0, false)
BENCH_REL(timed_wr_pri, 8thread_all_write, 8, 1.0, false)
BENCH_REL(timed_rd_pri, 8thread_all_write, 8, 1.0, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 16thread_all_write, 16, 1.0, false)
BENCH_REL(shmtx_wr_pri, 16thread_all_write, 16, 1.0, false)
BENCH_REL(shmtx_rd_pri, 16thread_all_write, 16, 1.0, false)
BENCH_REL(folly_ticket, 16thread_all_write, 16, 1.0, false)
BENCH_REL(boost_shared, 16thread_all_write, 16, 1.0, false)
BENCH_REL(pthrd_rwlock, 16thread_all_write, 16, 1.0, false)
BENCH_REL(pthrd_mutex_, 16thread_all_write, 16, 1.0, false)
BENCH_REL(timed_wr_pri, 16thread_all_write, 16, 1.0, false)
BENCH_REL(timed_rd_pri, 16thread_all_write, 16, 1.0, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 32thread_all_write, 32, 1.0, false)
BENCH_REL(shmtx_wr_pri, 32thread_all_write, 32, 1.0, false)
BENCH_REL(shmtx_rd_pri, 32thread_all_write, 32, 1.0, false)
BENCH_REL(folly_ticket, 32thread_all_write, 32, 1.0, false)
BENCH_REL(boost_shared, 32thread_all_write, 32, 1.0, false)
BENCH_REL(pthrd_rwlock, 32thread_all_write, 32, 1.0, false)
BENCH_REL(pthrd_mutex_, 32thread_all_write, 32, 1.0, false)
BENCH_REL(timed_wr_pri, 32thread_all_write, 32, 1.0, false)
BENCH_REL(timed_rd_pri, 32thread_all_write, 32, 1.0, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 64thread_all_write, 64, 1.0, false)
BENCH_REL(shmtx_wr_pri, 64thread_all_write, 64, 1.0, false)
BENCH_REL(shmtx_rd_pri, 64thread_all_write, 64, 1.0, false)
BENCH_REL(folly_ticket, 64thread_all_write, 64, 1.0, false)
BENCH_REL(boost_shared, 64thread_all_write, 64, 1.0, false)
BENCH_REL(pthrd_rwlock, 64thread_all_write, 64, 1.0, false)
BENCH_REL(pthrd_mutex_, 64thread_all_write, 64, 1.0, false)
BENCH_REL(timed_wr_pri, 64thread_all_write, 64, 1.0, false)
BENCH_REL(timed_rd_pri, 64thread_all_write, 64, 1.0, false)

// 1 lock used by everybody, 10% writes.  Not much scaling to be had.  Perf
// is best at 1 thread, once you've got multiple threads > 8 threads hurts.
BENCHMARK_DRAW_LINE();
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 1thread_10pct_write, 1, 0.10, false)
BENCH_REL(shmtx_wr_pri, 1thread_10pct_write, 1, 0.10, false)
BENCH_REL(shmtx_rd_pri, 1thread_10pct_write, 1, 0.10, false)
BENCH_REL(folly_ticket, 1thread_10pct_write, 1, 0.10, false)
BENCH_REL(boost_shared, 1thread_10pct_write, 1, 0.10, false)
BENCH_REL(pthrd_rwlock, 1thread_10pct_write, 1, 0.10, false)
BENCH_REL(timed_wr_pri, 1thread_10pct_write, 1, 0.10, false)
BENCH_REL(timed_rd_pri, 1thread_10pct_write, 1, 0.10, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 2thread_10pct_write, 2, 0.10, false)
BENCH_REL(shmtx_wr_pri, 2thread_10pct_write, 2, 0.10, false)
BENCH_REL(shmtx_rd_pri, 2thread_10pct_write, 2, 0.10, false)
BENCH_REL(folly_ticket, 2thread_10pct_write, 2, 0.10, false)
BENCH_REL(boost_shared, 2thread_10pct_write, 2, 0.10, false)
BENCH_REL(pthrd_rwlock, 2thread_10pct_write, 2, 0.10, false)
BENCH_REL(timed_wr_pri, 2thread_10pct_write, 2, 0.10, false)
BENCH_REL(timed_rd_pri, 2thread_10pct_write, 2, 0.10, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 4thread_10pct_write, 4, 0.10, false)
BENCH_REL(shmtx_wr_pri, 4thread_10pct_write, 4, 0.10, false)
BENCH_REL(shmtx_rd_pri, 4thread_10pct_write, 4, 0.10, false)
BENCH_REL(folly_ticket, 4thread_10pct_write, 4, 0.10, false)
BENCH_REL(boost_shared, 4thread_10pct_write, 4, 0.10, false)
BENCH_REL(pthrd_rwlock, 4thread_10pct_write, 4, 0.10, false)
BENCH_REL(timed_wr_pri, 4thread_10pct_write, 4, 0.10, false)
BENCH_REL(timed_rd_pri, 4thread_10pct_write, 4, 0.10, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 8thread_10pct_write, 8, 0.10, false)
BENCH_REL(shmtx_wr_pri, 8thread_10pct_write, 8, 0.10, false)
BENCH_REL(shmtx_rd_pri, 8thread_10pct_write, 8, 0.10, false)
BENCH_REL(folly_ticket, 8thread_10pct_write, 8, 0.10, false)
BENCH_REL(boost_shared, 8thread_10pct_write, 8, 0.10, false)
BENCH_REL(pthrd_rwlock, 8thread_10pct_write, 8, 0.10, false)
BENCH_REL(timed_wr_pri, 8thread_10pct_write, 8, 0.10, false)
BENCH_REL(timed_rd_pri, 8thread_10pct_write, 8, 0.10, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 16thread_10pct_write, 16, 0.10, false)
BENCH_REL(shmtx_wr_pri, 16thread_10pct_write, 16, 0.10, false)
BENCH_REL(shmtx_rd_pri, 16thread_10pct_write, 16, 0.10, false)
BENCH_REL(folly_ticket, 16thread_10pct_write, 16, 0.10, false)
BENCH_REL(boost_shared, 16thread_10pct_write, 16, 0.10, false)
BENCH_REL(pthrd_rwlock, 16thread_10pct_write, 16, 0.10, false)
BENCH_REL(timed_wr_pri, 16thread_10pct_write, 16, 0.10, false)
BENCH_REL(timed_rd_pri, 16thread_10pct_write, 16, 0.10, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 32thread_10pct_write, 32, 0.10, false)
BENCH_REL(shmtx_wr_pri, 32thread_10pct_write, 32, 0.10, false)
BENCH_REL(shmtx_rd_pri, 32thread_10pct_write, 32, 0.10, false)
BENCH_REL(folly_ticket, 32thread_10pct_write, 32, 0.10, false)
BENCH_REL(boost_shared, 32thread_10pct_write, 32, 0.10, false)
BENCH_REL(pthrd_rwlock, 32thread_10pct_write, 32, 0.10, false)
BENCH_REL(timed_wr_pri, 32thread_10pct_write, 32, 0.10, false)
BENCH_REL(timed_rd_pri, 32thread_10pct_write, 32, 0.10, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 64thread_10pct_write, 64, 0.10, false)
BENCH_REL(shmtx_wr_pri, 64thread_10pct_write, 64, 0.10, false)
BENCH_REL(shmtx_rd_pri, 64thread_10pct_write, 64, 0.10, false)
BENCH_REL(folly_ticket, 64thread_10pct_write, 64, 0.10, false)
BENCH_REL(boost_shared, 64thread_10pct_write, 64, 0.10, false)
BENCH_REL(pthrd_rwlock, 64thread_10pct_write, 64, 0.10, false)
BENCH_REL(timed_wr_pri, 64thread_10pct_write, 64, 0.10, false)
BENCH_REL(timed_rd_pri, 64thread_10pct_write, 64, 0.10, false)

// 1 lock used by everybody, 1% writes.  This is a more realistic example
// than the concurrent_*_reads benchmark, but still shows SharedMutex locks
// winning over all of the others
BENCHMARK_DRAW_LINE();
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 1thread_1pct_write, 1, 0.01, false)
BENCH_REL(shmtx_wr_pri, 1thread_1pct_write, 1, 0.01, false)
BENCH_REL(shmtx_w_bare, 1thread_1pct_write, 1, 0.01, false)
BENCH_REL(shmtx_rd_pri, 1thread_1pct_write, 1, 0.01, false)
BENCH_REL(shmtx_r_bare, 1thread_1pct_write, 1, 0.01, false)
BENCH_REL(folly_ticket, 1thread_1pct_write, 1, 0.01, false)
BENCH_REL(boost_shared, 1thread_1pct_write, 1, 0.01, false)
BENCH_REL(pthrd_rwlock, 1thread_1pct_write, 1, 0.01, false)
BENCH_REL(timed_wr_pri, 1thread_1pct_write, 1, 0.01, false)
BENCH_REL(timed_rd_pri, 1thread_1pct_write, 1, 0.01, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 2thread_1pct_write, 2, 0.01, false)
BENCH_REL(shmtx_wr_pri, 2thread_1pct_write, 2, 0.01, false)
BENCH_REL(shmtx_w_bare, 2thread_1pct_write, 2, 0.01, false)
BENCH_REL(shmtx_rd_pri, 2thread_1pct_write, 2, 0.01, false)
BENCH_REL(shmtx_r_bare, 2thread_1pct_write, 2, 0.01, false)
BENCH_REL(folly_ticket, 2thread_1pct_write, 2, 0.01, false)
BENCH_REL(boost_shared, 2thread_1pct_write, 2, 0.01, false)
BENCH_REL(pthrd_rwlock, 2thread_1pct_write, 2, 0.01, false)
BENCH_REL(timed_wr_pri, 2thread_1pct_write, 2, 0.01, false)
BENCH_REL(timed_rd_pri, 2thread_1pct_write, 2, 0.01, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 4thread_1pct_write, 4, 0.01, false)
BENCH_REL(shmtx_wr_pri, 4thread_1pct_write, 4, 0.01, false)
BENCH_REL(shmtx_w_bare, 4thread_1pct_write, 4, 0.01, false)
BENCH_REL(shmtx_rd_pri, 4thread_1pct_write, 4, 0.01, false)
BENCH_REL(shmtx_r_bare, 4thread_1pct_write, 4, 0.01, false)
BENCH_REL(folly_ticket, 4thread_1pct_write, 4, 0.01, false)
BENCH_REL(boost_shared, 4thread_1pct_write, 4, 0.01, false)
BENCH_REL(pthrd_rwlock, 4thread_1pct_write, 4, 0.01, false)
BENCH_REL(timed_wr_pri, 4thread_1pct_write, 4, 0.01, false)
BENCH_REL(timed_rd_pri, 4thread_1pct_write, 4, 0.01, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 8thread_1pct_write, 8, 0.01, false)
BENCH_REL(shmtx_wr_pri, 8thread_1pct_write, 8, 0.01, false)
BENCH_REL(shmtx_w_bare, 8thread_1pct_write, 8, 0.01, false)
BENCH_REL(shmtx_rd_pri, 8thread_1pct_write, 8, 0.01, false)
BENCH_REL(shmtx_r_bare, 8thread_1pct_write, 8, 0.01, false)
BENCH_REL(folly_ticket, 8thread_1pct_write, 8, 0.01, false)
BENCH_REL(boost_shared, 8thread_1pct_write, 8, 0.01, false)
BENCH_REL(pthrd_rwlock, 8thread_1pct_write, 8, 0.01, false)
BENCH_REL(timed_wr_pri, 8thread_1pct_write, 8, 0.01, false)
BENCH_REL(timed_rd_pri, 8thread_1pct_write, 8, 0.01, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 16thread_1pct_write, 16, 0.01, false)
BENCH_REL(shmtx_wr_pri, 16thread_1pct_write, 16, 0.01, false)
BENCH_REL(shmtx_w_bare, 16thread_1pct_write, 16, 0.01, false)
BENCH_REL(shmtx_rd_pri, 16thread_1pct_write, 16, 0.01, false)
BENCH_REL(shmtx_r_bare, 16thread_1pct_write, 16, 0.01, false)
BENCH_REL(folly_ticket, 16thread_1pct_write, 16, 0.01, false)
BENCH_REL(boost_shared, 16thread_1pct_write, 16, 0.01, false)
BENCH_REL(pthrd_rwlock, 16thread_1pct_write, 16, 0.01, false)
BENCH_REL(timed_wr_pri, 16thread_1pct_write, 16, 0.01, false)
BENCH_REL(timed_rd_pri, 16thread_1pct_write, 16, 0.01, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 32thread_1pct_write, 32, 0.01, false)
BENCH_REL(shmtx_wr_pri, 32thread_1pct_write, 32, 0.01, false)
BENCH_REL(shmtx_w_bare, 32thread_1pct_write, 32, 0.01, false)
BENCH_REL(shmtx_rd_pri, 32thread_1pct_write, 32, 0.01, false)
BENCH_REL(shmtx_r_bare, 32thread_1pct_write, 32, 0.01, false)
BENCH_REL(folly_ticket, 32thread_1pct_write, 32, 0.01, false)
BENCH_REL(boost_shared, 32thread_1pct_write, 32, 0.01, false)
BENCH_REL(pthrd_rwlock, 32thread_1pct_write, 32, 0.01, false)
BENCH_REL(timed_wr_pri, 32thread_1pct_write, 32, 0.01, false)
BENCH_REL(timed_rd_pri, 32thread_1pct_write, 32, 0.01, false)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 64thread_1pct_write, 64, 0.01, false)
BENCH_REL(shmtx_wr_pri, 64thread_1pct_write, 64, 0.01, false)
BENCH_REL(shmtx_w_bare, 64thread_1pct_write, 64, 0.01, false)
BENCH_REL(shmtx_rd_pri, 64thread_1pct_write, 64, 0.01, false)
BENCH_REL(shmtx_r_bare, 64thread_1pct_write, 64, 0.01, false)
BENCH_REL(folly_ticket, 64thread_1pct_write, 64, 0.01, false)
BENCH_REL(boost_shared, 64thread_1pct_write, 64, 0.01, false)
BENCH_REL(pthrd_rwlock, 64thread_1pct_write, 64, 0.01, false)
BENCH_REL(timed_wr_pri, 64thread_1pct_write, 64, 0.01, false)
BENCH_REL(timed_rd_pri, 64thread_1pct_write, 64, 0.01, false)

// Worst case scenario for deferred locks. No actual sharing, likely that
// read operations will have to first set the kDeferredReadersPossibleBit,
// and likely that writers will have to scan deferredReaders[].
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 2thr_2lock_50pct_write, 2, 0.50, true)
BENCH_REL(shmtx_wr_pri, 2thr_2lock_50pct_write, 2, 0.50, true)
BENCH_REL(shmtx_rd_pri, 2thr_2lock_50pct_write, 2, 0.50, true)
BENCH_BASE(folly_rwspin, 4thr_4lock_50pct_write, 4, 0.50, true)
BENCH_REL(shmtx_wr_pri, 4thr_4lock_50pct_write, 4, 0.50, true)
BENCH_REL(shmtx_rd_pri, 4thr_4lock_50pct_write, 4, 0.50, true)
BENCH_BASE(folly_rwspin, 8thr_8lock_50pct_write, 8, 0.50, true)
BENCH_REL(shmtx_wr_pri, 8thr_8lock_50pct_write, 8, 0.50, true)
BENCH_REL(shmtx_rd_pri, 8thr_8lock_50pct_write, 8, 0.50, true)
BENCH_BASE(folly_rwspin, 16thr_16lock_50pct_write, 16, 0.50, true)
BENCH_REL(shmtx_wr_pri, 16thr_16lock_50pct_write, 16, 0.50, true)
BENCH_REL(shmtx_rd_pri, 16thr_16lock_50pct_write, 16, 0.50, true)
BENCH_BASE(folly_rwspin, 32thr_32lock_50pct_write, 32, 0.50, true)
BENCH_REL(shmtx_wr_pri, 32thr_32lock_50pct_write, 32, 0.50, true)
BENCH_REL(shmtx_rd_pri, 32thr_32lock_50pct_write, 32, 0.50, true)
BENCH_BASE(folly_rwspin, 64thr_64lock_50pct_write, 64, 0.50, true)
BENCH_REL(shmtx_wr_pri, 64thr_64lock_50pct_write, 64, 0.50, true)
BENCH_REL(shmtx_rd_pri, 64thr_64lock_50pct_write, 64, 0.50, true)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 2thr_2lock_10pct_write, 2, 0.10, true)
BENCH_REL(shmtx_wr_pri, 2thr_2lock_10pct_write, 2, 0.10, true)
BENCH_REL(shmtx_rd_pri, 2thr_2lock_10pct_write, 2, 0.10, true)
BENCH_BASE(folly_rwspin, 4thr_4lock_10pct_write, 4, 0.10, true)
BENCH_REL(shmtx_wr_pri, 4thr_4lock_10pct_write, 4, 0.10, true)
BENCH_REL(shmtx_rd_pri, 4thr_4lock_10pct_write, 4, 0.10, true)
BENCH_BASE(folly_rwspin, 8thr_8lock_10pct_write, 8, 0.10, true)
BENCH_REL(shmtx_wr_pri, 8thr_8lock_10pct_write, 8, 0.10, true)
BENCH_REL(shmtx_rd_pri, 8thr_8lock_10pct_write, 8, 0.10, true)
BENCH_BASE(folly_rwspin, 16thr_16lock_10pct_write, 16, 0.10, true)
BENCH_REL(shmtx_wr_pri, 16thr_16lock_10pct_write, 16, 0.10, true)
BENCH_REL(shmtx_rd_pri, 16thr_16lock_10pct_write, 16, 0.10, true)
BENCH_BASE(folly_rwspin, 32thr_32lock_10pct_write, 32, 0.10, true)
BENCH_REL(shmtx_wr_pri, 32thr_32lock_10pct_write, 32, 0.10, true)
BENCH_REL(shmtx_rd_pri, 32thr_32lock_10pct_write, 32, 0.10, true)
BENCH_BASE(folly_rwspin, 64thr_64lock_10pct_write, 64, 0.10, true)
BENCH_REL(shmtx_wr_pri, 64thr_64lock_10pct_write, 64, 0.10, true)
BENCH_REL(shmtx_rd_pri, 64thr_64lock_10pct_write, 64, 0.10, true)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin, 2thr_2lock_1pct_write, 2, 0.01, true)
BENCH_REL(shmtx_wr_pri, 2thr_2lock_1pct_write, 2, 0.01, true)
BENCH_REL(shmtx_rd_pri, 2thr_2lock_1pct_write, 2, 0.01, true)
BENCH_BASE(folly_rwspin, 4thr_4lock_1pct_write, 4, 0.01, true)
BENCH_REL(shmtx_wr_pri, 4thr_4lock_1pct_write, 4, 0.01, true)
BENCH_REL(shmtx_rd_pri, 4thr_4lock_1pct_write, 4, 0.01, true)
BENCH_BASE(folly_rwspin, 8thr_8lock_1pct_write, 8, 0.01, true)
BENCH_REL(shmtx_wr_pri, 8thr_8lock_1pct_write, 8, 0.01, true)
BENCH_REL(shmtx_rd_pri, 8thr_8lock_1pct_write, 8, 0.01, true)
BENCH_BASE(folly_rwspin, 16thr_16lock_1pct_write, 16, 0.01, true)
BENCH_REL(shmtx_wr_pri, 16thr_16lock_1pct_write, 16, 0.01, true)
BENCH_REL(shmtx_rd_pri, 16thr_16lock_1pct_write, 16, 0.01, true)
BENCH_BASE(folly_rwspin, 32thr_32lock_1pct_write, 32, 0.01, true)
BENCH_REL(shmtx_wr_pri, 32thr_32lock_1pct_write, 32, 0.01, true)
BENCH_REL(shmtx_rd_pri, 32thr_32lock_1pct_write, 32, 0.01, true)
BENCH_BASE(folly_rwspin, 64thr_64lock_1pct_write, 64, 0.01, true)
BENCH_REL(shmtx_wr_pri, 64thr_64lock_1pct_write, 64, 0.01, true)
BENCH_REL(shmtx_rd_pri, 64thr_64lock_1pct_write, 64, 0.01, true)

// Ping-pong tests have a scaled number of iterations, because their burn
// loop would make them too slow otherwise.  Ping-pong with burn count of
// 100k or 300k shows the advantage of soft-spin, reducing the cost of
// each wakeup by about 20 usec.  (Take benchmark reported difference,
// ~400 nanos, multiply by the scale of 100, then divide by 2 because
// each round has two wakeups.)
BENCHMARK_DRAW_LINE();
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin_ping_pong, burn0, 1, 0)
BENCH_REL(shmtx_w_bare_ping_pong, burn0, 1, 0)
BENCH_REL(shmtx_r_bare_ping_pong, burn0, 1, 0)
BENCH_REL(folly_ticket_ping_pong, burn0, 1, 0)
BENCH_REL(boost_shared_ping_pong, burn0, 1, 0)
BENCH_REL(pthrd_rwlock_ping_pong, burn0, 1, 0)
BENCH_REL(timed_wr_pri_ping_pong, burn0, 1, 0)
BENCH_REL(timed_rd_pri_ping_pong, burn0, 1, 0)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin_ping_pong, burn100k, 100, 100000)
BENCH_REL(shmtx_w_bare_ping_pong, burn100k, 100, 100000)
BENCH_REL(shmtx_r_bare_ping_pong, burn100k, 100, 100000)
BENCH_REL(folly_ticket_ping_pong, burn100k, 100, 100000)
BENCH_REL(boost_shared_ping_pong, burn100k, 100, 100000)
BENCH_REL(pthrd_rwlock_ping_pong, burn100k, 100, 100000)
BENCH_REL(timed_wr_pri_ping_pong, burn100k, 100, 100000)
BENCH_REL(timed_rd_pri_ping_pong, burn100k, 100, 100000)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin_ping_pong, burn300k, 100, 300000)
BENCH_REL(shmtx_w_bare_ping_pong, burn300k, 100, 300000)
BENCH_REL(shmtx_r_bare_ping_pong, burn300k, 100, 300000)
BENCH_REL(folly_ticket_ping_pong, burn300k, 100, 300000)
BENCH_REL(boost_shared_ping_pong, burn300k, 100, 300000)
BENCH_REL(pthrd_rwlock_ping_pong, burn300k, 100, 300000)
BENCH_REL(timed_wr_pri_ping_pong, burn300k, 100, 300000)
BENCH_REL(timed_rd_pri_ping_pong, burn300k, 100, 300000)
BENCHMARK_DRAW_LINE();
BENCH_BASE(folly_rwspin_ping_pong, burn1M, 1000, 1000000)
BENCH_REL(shmtx_w_bare_ping_pong, burn1M, 1000, 1000000)
BENCH_REL(shmtx_r_bare_ping_pong, burn1M, 1000, 1000000)
BENCH_REL(folly_ticket_ping_pong, burn1M, 1000, 1000000)
BENCH_REL(boost_shared_ping_pong, burn1M, 1000, 1000000)
BENCH_REL(pthrd_rwlock_ping_pong, burn1M, 1000, 1000000)
BENCH_REL(timed_wr_pri_ping_pong, burn1M, 1000, 1000000)
BENCH_REL(timed_rd_pri_ping_pong, burn1M, 1000, 1000000)

// Reproduce with 10 minutes and
//   sudo nice -n -20
//     shared_mutex_test --benchmark --bm_min_iters=1000000
//
// Comparison use folly::RWSpinLock as the baseline, with the
// following row being the default SharedMutex (using *Holder or
// Token-ful methods).
//
// Following results on 2-socket Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz
//
// ============================================================================
// folly/test/SharedMutexTest.cpp                  relative  time/iter  iters/s
// ============================================================================
// single_thread_lock_shared_unlock_shared                     25.17ns   39.74M
// single_thread_lock_unlock                                   25.88ns   38.64M
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// folly_rwspin_reads(1thread)                                 15.16ns   65.95M
// shmtx_wr_pri_reads(1thread)                       69.18%    21.92ns   45.63M
// shmtx_w_bare_reads(1thread)                       56.07%    27.04ns   36.98M
// shmtx_rd_pri_reads(1thread)                       69.06%    21.95ns   45.55M
// shmtx_r_bare_reads(1thread)                       56.36%    26.90ns   37.17M
// folly_ticket_reads(1thread)                       57.56%    26.34ns   37.96M
// boost_shared_reads(1thread)                       10.55%   143.72ns    6.96M
// pthrd_rwlock_reads(1thread)                       39.61%    38.28ns   26.12M
// ----------------------------------------------------------------------------
// folly_rwspin_reads(2thread)                                 45.05ns   22.20M
// shmtx_wr_pri_reads(2thread)                      379.98%    11.86ns   84.34M
// shmtx_w_bare_reads(2thread)                      319.27%    14.11ns   70.87M
// shmtx_rd_pri_reads(2thread)                      385.59%    11.68ns   85.59M
// shmtx_r_bare_reads(2thread)                      306.56%    14.70ns   68.04M
// folly_ticket_reads(2thread)                       61.07%    73.78ns   13.55M
// boost_shared_reads(2thread)                       13.54%   332.66ns    3.01M
// pthrd_rwlock_reads(2thread)                       34.22%   131.65ns    7.60M
// ----------------------------------------------------------------------------
// folly_rwspin_reads(4thread)                                 62.19ns   16.08M
// shmtx_wr_pri_reads(4thread)                     1022.82%     6.08ns  164.48M
// shmtx_w_bare_reads(4thread)                      875.37%     7.10ns  140.76M
// shmtx_rd_pri_reads(4thread)                     1060.46%     5.86ns  170.53M
// shmtx_r_bare_reads(4thread)                      879.88%     7.07ns  141.49M
// folly_ticket_reads(4thread)                       64.62%    96.23ns   10.39M
// boost_shared_reads(4thread)                       14.86%   418.49ns    2.39M
// pthrd_rwlock_reads(4thread)                       25.01%   248.65ns    4.02M
// ----------------------------------------------------------------------------
// folly_rwspin_reads(8thread)                                 64.09ns   15.60M
// shmtx_wr_pri_reads(8thread)                     2191.99%     2.92ns  342.03M
// shmtx_w_bare_reads(8thread)                     1804.92%     3.55ns  281.63M
// shmtx_rd_pri_reads(8thread)                     2194.60%     2.92ns  342.44M
// shmtx_r_bare_reads(8thread)                     1800.53%     3.56ns  280.95M
// folly_ticket_reads(8thread)                       54.90%   116.74ns    8.57M
// boost_shared_reads(8thread)                       18.25%   351.24ns    2.85M
// pthrd_rwlock_reads(8thread)                       28.19%   227.31ns    4.40M
// ----------------------------------------------------------------------------
// folly_rwspin_reads(16thread)                                70.06ns   14.27M
// shmtx_wr_pri_reads(16thread)                    4970.09%     1.41ns  709.38M
// shmtx_w_bare_reads(16thread)                    4143.75%     1.69ns  591.44M
// shmtx_rd_pri_reads(16thread)                    5009.31%     1.40ns  714.98M
// shmtx_r_bare_reads(16thread)                    4067.36%     1.72ns  580.54M
// folly_ticket_reads(16thread)                      46.78%   149.77ns    6.68M
// boost_shared_reads(16thread)                      21.67%   323.37ns    3.09M
// pthrd_rwlock_reads(16thread)                      35.05%   199.90ns    5.00M
// ----------------------------------------------------------------------------
// folly_rwspin_reads(32thread)                                58.83ns   17.00M
// shmtx_wr_pri_reads(32thread)                    5158.37%     1.14ns  876.79M
// shmtx_w_bare_reads(32thread)                    4246.03%     1.39ns  721.72M
// shmtx_rd_pri_reads(32thread)                    4845.97%     1.21ns  823.69M
// shmtx_r_bare_reads(32thread)                    4721.44%     1.25ns  802.52M
// folly_ticket_reads(32thread)                      28.40%   207.15ns    4.83M
// boost_shared_reads(32thread)                      17.08%   344.54ns    2.90M
// pthrd_rwlock_reads(32thread)                      30.01%   196.02ns    5.10M
// ----------------------------------------------------------------------------
// folly_rwspin_reads(64thread)                                59.19ns   16.89M
// shmtx_wr_pri_reads(64thread)                    3804.54%     1.56ns  642.76M
// shmtx_w_bare_reads(64thread)                    3625.06%     1.63ns  612.43M
// shmtx_rd_pri_reads(64thread)                    3418.19%     1.73ns  577.48M
// shmtx_r_bare_reads(64thread)                    3416.98%     1.73ns  577.28M
// folly_ticket_reads(64thread)                      30.53%   193.90ns    5.16M
// boost_shared_reads(64thread)                      18.59%   318.47ns    3.14M
// pthrd_rwlock_reads(64thread)                      31.35%   188.81ns    5.30M
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// folly_rwspin(1thread_all_write)                             23.77ns   42.06M
// shmtx_wr_pri(1thread_all_write)                   85.09%    27.94ns   35.79M
// shmtx_rd_pri(1thread_all_write)                   85.32%    27.87ns   35.89M
// folly_ticket(1thread_all_write)                   88.11%    26.98ns   37.06M
// boost_shared(1thread_all_write)                   16.49%   144.14ns    6.94M
// pthrd_rwlock(1thread_all_write)                   53.99%    44.04ns   22.71M
// pthrd_mutex_(1thread_all_write)                   86.05%    27.63ns   36.20M
// ----------------------------------------------------------------------------
// folly_rwspin(2thread_all_write)                             76.05ns   13.15M
// shmtx_wr_pri(2thread_all_write)                   60.67%   125.35ns    7.98M
// shmtx_rd_pri(2thread_all_write)                   60.36%   125.99ns    7.94M
// folly_ticket(2thread_all_write)                  129.10%    58.91ns   16.98M
// boost_shared(2thread_all_write)                   18.65%   407.74ns    2.45M
// pthrd_rwlock(2thread_all_write)                   40.90%   185.92ns    5.38M
// pthrd_mutex_(2thread_all_write)                  127.37%    59.71ns   16.75M
// ----------------------------------------------------------------------------
// folly_rwspin(4thread_all_write)                            207.17ns    4.83M
// shmtx_wr_pri(4thread_all_write)                  119.42%   173.49ns    5.76M
// shmtx_rd_pri(4thread_all_write)                  117.68%   176.05ns    5.68M
// folly_ticket(4thread_all_write)                  182.39%   113.59ns    8.80M
// boost_shared(4thread_all_write)                   11.98%     1.73us  578.46K
// pthrd_rwlock(4thread_all_write)                   27.50%   753.25ns    1.33M
// pthrd_mutex_(4thread_all_write)                  117.75%   175.95ns    5.68M
// ----------------------------------------------------------------------------
// folly_rwspin(8thread_all_write)                            326.50ns    3.06M
// shmtx_wr_pri(8thread_all_write)                  125.47%   260.22ns    3.84M
// shmtx_rd_pri(8thread_all_write)                  124.73%   261.76ns    3.82M
// folly_ticket(8thread_all_write)                  253.39%   128.85ns    7.76M
// boost_shared(8thread_all_write)                    6.36%     5.13us  194.87K
// pthrd_rwlock(8thread_all_write)                   38.54%   847.09ns    1.18M
// pthrd_mutex_(8thread_all_write)                  166.31%   196.32ns    5.09M
// ----------------------------------------------------------------------------
// folly_rwspin(16thread_all_write)                           729.89ns    1.37M
// shmtx_wr_pri(16thread_all_write)                 219.91%   331.91ns    3.01M
// shmtx_rd_pri(16thread_all_write)                 220.09%   331.62ns    3.02M
// folly_ticket(16thread_all_write)                 390.06%   187.12ns    5.34M
// boost_shared(16thread_all_write)                  10.27%     7.11us  140.72K
// pthrd_rwlock(16thread_all_write)                 113.90%   640.84ns    1.56M
// pthrd_mutex_(16thread_all_write)                 401.97%   181.58ns    5.51M
// ----------------------------------------------------------------------------
// folly_rwspin(32thread_all_write)                             1.55us  645.01K
// shmtx_wr_pri(32thread_all_write)                 415.05%   373.54ns    2.68M
// shmtx_rd_pri(32thread_all_write)                 258.45%   599.88ns    1.67M
// folly_ticket(32thread_all_write)                 525.40%   295.09ns    3.39M
// boost_shared(32thread_all_write)                  20.84%     7.44us  134.45K
// pthrd_rwlock(32thread_all_write)                 254.16%   610.00ns    1.64M
// pthrd_mutex_(32thread_all_write)                 852.51%   181.86ns    5.50M
// ----------------------------------------------------------------------------
// folly_rwspin(64thread_all_write)                             2.03us  492.00K
// shmtx_wr_pri(64thread_all_write)                 517.65%   392.64ns    2.55M
// shmtx_rd_pri(64thread_all_write)                 288.20%   705.24ns    1.42M
// folly_ticket(64thread_all_write)                 638.22%   318.47ns    3.14M
// boost_shared(64thread_all_write)                  27.56%     7.37us  135.61K
// pthrd_rwlock(64thread_all_write)                 326.75%   622.04ns    1.61M
// pthrd_mutex_(64thread_all_write)                1231.57%   165.04ns    6.06M
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// folly_rwspin(1thread_10pct_write)                           19.39ns   51.58M
// shmtx_wr_pri(1thread_10pct_write)                 93.87%    20.65ns   48.42M
// shmtx_rd_pri(1thread_10pct_write)                 93.60%    20.71ns   48.28M
// folly_ticket(1thread_10pct_write)                 73.75%    26.29ns   38.04M
// boost_shared(1thread_10pct_write)                 12.97%   149.53ns    6.69M
// pthrd_rwlock(1thread_10pct_write)                 44.15%    43.92ns   22.77M
// ----------------------------------------------------------------------------
// folly_rwspin(2thread_10pct_write)                          227.88ns    4.39M
// shmtx_wr_pri(2thread_10pct_write)                321.08%    70.98ns   14.09M
// shmtx_rd_pri(2thread_10pct_write)                280.65%    81.20ns   12.32M
// folly_ticket(2thread_10pct_write)                220.43%   103.38ns    9.67M
// boost_shared(2thread_10pct_write)                 58.78%   387.71ns    2.58M
// pthrd_rwlock(2thread_10pct_write)                112.68%   202.23ns    4.94M
// ----------------------------------------------------------------------------
// folly_rwspin(4thread_10pct_write)                          444.94ns    2.25M
// shmtx_wr_pri(4thread_10pct_write)                470.35%    94.60ns   10.57M
// shmtx_rd_pri(4thread_10pct_write)                349.08%   127.46ns    7.85M
// folly_ticket(4thread_10pct_write)                305.64%   145.58ns    6.87M
// boost_shared(4thread_10pct_write)                 44.43%     1.00us  998.57K
// pthrd_rwlock(4thread_10pct_write)                100.59%   442.31ns    2.26M
// ----------------------------------------------------------------------------
// folly_rwspin(8thread_10pct_write)                          424.67ns    2.35M
// shmtx_wr_pri(8thread_10pct_write)                337.53%   125.82ns    7.95M
// shmtx_rd_pri(8thread_10pct_write)                232.32%   182.79ns    5.47M
// folly_ticket(8thread_10pct_write)                206.59%   205.56ns    4.86M
// boost_shared(8thread_10pct_write)                 19.45%     2.18us  457.90K
// pthrd_rwlock(8thread_10pct_write)                 78.58%   540.42ns    1.85M
// ----------------------------------------------------------------------------
// folly_rwspin(16thread_10pct_write)                         727.04ns    1.38M
// shmtx_wr_pri(16thread_10pct_write)               400.60%   181.49ns    5.51M
// shmtx_rd_pri(16thread_10pct_write)               312.94%   232.33ns    4.30M
// folly_ticket(16thread_10pct_write)               283.67%   256.30ns    3.90M
// boost_shared(16thread_10pct_write)                15.87%     4.58us  218.32K
// pthrd_rwlock(16thread_10pct_write)               131.28%   553.82ns    1.81M
// ----------------------------------------------------------------------------
// folly_rwspin(32thread_10pct_write)                         810.61ns    1.23M
// shmtx_wr_pri(32thread_10pct_write)               429.61%   188.68ns    5.30M
// shmtx_rd_pri(32thread_10pct_write)               321.13%   252.42ns    3.96M
// folly_ticket(32thread_10pct_write)               247.65%   327.32ns    3.06M
// boost_shared(32thread_10pct_write)                 8.34%     9.71us  102.94K
// pthrd_rwlock(32thread_10pct_write)               144.28%   561.85ns    1.78M
// ----------------------------------------------------------------------------
// folly_rwspin(64thread_10pct_write)                           1.10us  912.30K
// shmtx_wr_pri(64thread_10pct_write)               486.68%   225.22ns    4.44M
// shmtx_rd_pri(64thread_10pct_write)               412.96%   265.43ns    3.77M
// folly_ticket(64thread_10pct_write)               280.23%   391.15ns    2.56M
// boost_shared(64thread_10pct_write)                 6.16%    17.79us   56.22K
// pthrd_rwlock(64thread_10pct_write)               198.81%   551.34ns    1.81M
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// folly_rwspin(1thread_1pct_write)                            19.02ns   52.57M
// shmtx_wr_pri(1thread_1pct_write)                  94.46%    20.14ns   49.66M
// shmtx_w_bare(1thread_1pct_write)                  76.60%    24.83ns   40.27M
// shmtx_rd_pri(1thread_1pct_write)                  93.83%    20.27ns   49.33M
// shmtx_r_bare(1thread_1pct_write)                  77.04%    24.69ns   40.50M
// folly_ticket(1thread_1pct_write)                  72.83%    26.12ns   38.29M
// boost_shared(1thread_1pct_write)                  12.48%   152.44ns    6.56M
// pthrd_rwlock(1thread_1pct_write)                  42.85%    44.39ns   22.53M
// ----------------------------------------------------------------------------
// folly_rwspin(2thread_1pct_write)                           110.63ns    9.04M
// shmtx_wr_pri(2thread_1pct_write)                 442.12%    25.02ns   39.96M
// shmtx_w_bare(2thread_1pct_write)                 374.65%    29.53ns   33.86M
// shmtx_rd_pri(2thread_1pct_write)                 371.08%    29.81ns   33.54M
// shmtx_r_bare(2thread_1pct_write)                 138.02%    80.15ns   12.48M
// folly_ticket(2thread_1pct_write)                 131.34%    84.23ns   11.87M
// boost_shared(2thread_1pct_write)                  30.35%   364.58ns    2.74M
// pthrd_rwlock(2thread_1pct_write)                  95.48%   115.87ns    8.63M
// ----------------------------------------------------------------------------
// folly_rwspin(4thread_1pct_write)                           140.62ns    7.11M
// shmtx_wr_pri(4thread_1pct_write)                 627.13%    22.42ns   44.60M
// shmtx_w_bare(4thread_1pct_write)                 552.94%    25.43ns   39.32M
// shmtx_rd_pri(4thread_1pct_write)                 226.06%    62.21ns   16.08M
// shmtx_r_bare(4thread_1pct_write)                  77.61%   181.19ns    5.52M
// folly_ticket(4thread_1pct_write)                 119.58%   117.60ns    8.50M
// boost_shared(4thread_1pct_write)                  25.36%   554.54ns    1.80M
// pthrd_rwlock(4thread_1pct_write)                  45.55%   308.72ns    3.24M
// ----------------------------------------------------------------------------
// folly_rwspin(8thread_1pct_write)                           166.23ns    6.02M
// shmtx_wr_pri(8thread_1pct_write)                 687.09%    24.19ns   41.33M
// shmtx_w_bare(8thread_1pct_write)                 611.80%    27.17ns   36.80M
// shmtx_rd_pri(8thread_1pct_write)                 140.37%   118.43ns    8.44M
// shmtx_r_bare(8thread_1pct_write)                  80.32%   206.97ns    4.83M
// folly_ticket(8thread_1pct_write)                 117.06%   142.01ns    7.04M
// boost_shared(8thread_1pct_write)                  22.29%   745.67ns    1.34M
// pthrd_rwlock(8thread_1pct_write)                  49.84%   333.55ns    3.00M
// ----------------------------------------------------------------------------
// folly_rwspin(16thread_1pct_write)                          419.79ns    2.38M
// shmtx_wr_pri(16thread_1pct_write)               1397.92%    30.03ns   33.30M
// shmtx_w_bare(16thread_1pct_write)               1324.60%    31.69ns   31.55M
// shmtx_rd_pri(16thread_1pct_write)                278.12%   150.94ns    6.63M
// shmtx_r_bare(16thread_1pct_write)                194.25%   216.11ns    4.63M
// folly_ticket(16thread_1pct_write)                255.38%   164.38ns    6.08M
// boost_shared(16thread_1pct_write)                 33.71%     1.25us  803.01K
// pthrd_rwlock(16thread_1pct_write)                131.96%   318.12ns    3.14M
// ----------------------------------------------------------------------------
// folly_rwspin(32thread_1pct_write)                          395.99ns    2.53M
// shmtx_wr_pri(32thread_1pct_write)               1332.76%    29.71ns   33.66M
// shmtx_w_bare(32thread_1pct_write)               1208.86%    32.76ns   30.53M
// shmtx_rd_pri(32thread_1pct_write)                252.97%   156.54ns    6.39M
// shmtx_r_bare(32thread_1pct_write)                193.79%   204.35ns    4.89M
// folly_ticket(32thread_1pct_write)                173.16%   228.69ns    4.37M
// boost_shared(32thread_1pct_write)                 17.00%     2.33us  429.40K
// pthrd_rwlock(32thread_1pct_write)                129.88%   304.89ns    3.28M
// ----------------------------------------------------------------------------
// folly_rwspin(64thread_1pct_write)                          424.07ns    2.36M
// shmtx_wr_pri(64thread_1pct_write)               1297.89%    32.67ns   30.61M
// shmtx_w_bare(64thread_1pct_write)               1228.88%    34.51ns   28.98M
// shmtx_rd_pri(64thread_1pct_write)                270.40%   156.83ns    6.38M
// shmtx_r_bare(64thread_1pct_write)                218.05%   194.48ns    5.14M
// folly_ticket(64thread_1pct_write)                171.44%   247.36ns    4.04M
// boost_shared(64thread_1pct_write)                 10.60%     4.00us  249.95K
// pthrd_rwlock(64thread_1pct_write)                143.80%   294.91ns    3.39M
// ----------------------------------------------------------------------------
// folly_rwspin(2thr_2lock_50pct_write)                        10.87ns   91.99M
// shmtx_wr_pri(2thr_2lock_50pct_write)              83.71%    12.99ns   77.01M
// shmtx_rd_pri(2thr_2lock_50pct_write)              84.08%    12.93ns   77.34M
// folly_rwspin(4thr_4lock_50pct_write)                         5.32ns  188.12M
// shmtx_wr_pri(4thr_4lock_50pct_write)              82.21%     6.47ns  154.65M
// shmtx_rd_pri(4thr_4lock_50pct_write)              81.20%     6.55ns  152.75M
// folly_rwspin(8thr_8lock_50pct_write)                         2.64ns  379.06M
// shmtx_wr_pri(8thr_8lock_50pct_write)              81.26%     3.25ns  308.03M
// shmtx_rd_pri(8thr_8lock_50pct_write)              80.95%     3.26ns  306.86M
// folly_rwspin(16thr_16lock_50pct_write)                       1.52ns  656.77M
// shmtx_wr_pri(16thr_16lock_50pct_write)            86.24%     1.77ns  566.41M
// shmtx_rd_pri(16thr_16lock_50pct_write)            83.72%     1.82ns  549.82M
// folly_rwspin(32thr_32lock_50pct_write)                       1.19ns  841.03M
// shmtx_wr_pri(32thr_32lock_50pct_write)            85.08%     1.40ns  715.55M
// shmtx_rd_pri(32thr_32lock_50pct_write)            86.44%     1.38ns  727.00M
// folly_rwspin(64thr_64lock_50pct_write)                       1.46ns  684.28M
// shmtx_wr_pri(64thr_64lock_50pct_write)            84.53%     1.73ns  578.43M
// shmtx_rd_pri(64thr_64lock_50pct_write)            82.80%     1.76ns  566.58M
// ----------------------------------------------------------------------------
// folly_rwspin(2thr_2lock_10pct_write)                        10.01ns   99.85M
// shmtx_wr_pri(2thr_2lock_10pct_write)              92.02%    10.88ns   91.88M
// shmtx_rd_pri(2thr_2lock_10pct_write)              92.35%    10.84ns   92.22M
// folly_rwspin(4thr_4lock_10pct_write)                         4.81ns  207.87M
// shmtx_wr_pri(4thr_4lock_10pct_write)              89.32%     5.39ns  185.67M
// shmtx_rd_pri(4thr_4lock_10pct_write)              88.96%     5.41ns  184.93M
// folly_rwspin(8thr_8lock_10pct_write)                         2.39ns  417.62M
// shmtx_wr_pri(8thr_8lock_10pct_write)              91.17%     2.63ns  380.76M
// shmtx_rd_pri(8thr_8lock_10pct_write)              89.53%     2.67ns  373.92M
// folly_rwspin(16thr_16lock_10pct_write)                       1.16ns  860.47M
// shmtx_wr_pri(16thr_16lock_10pct_write)            74.35%     1.56ns  639.77M
// shmtx_rd_pri(16thr_16lock_10pct_write)            91.34%     1.27ns  785.97M
// folly_rwspin(32thr_32lock_10pct_write)                       1.15ns  866.23M
// shmtx_wr_pri(32thr_32lock_10pct_write)            92.32%     1.25ns  799.72M
// shmtx_rd_pri(32thr_32lock_10pct_write)            94.40%     1.22ns  817.71M
// folly_rwspin(64thr_64lock_10pct_write)                       1.41ns  710.54M
// shmtx_wr_pri(64thr_64lock_10pct_write)            94.14%     1.50ns  668.88M
// shmtx_rd_pri(64thr_64lock_10pct_write)            94.80%     1.48ns  673.56M
// ----------------------------------------------------------------------------
// folly_rwspin(2thr_2lock_1pct_write)                          9.58ns  104.36M
// shmtx_wr_pri(2thr_2lock_1pct_write)               92.00%    10.42ns   96.01M
// shmtx_rd_pri(2thr_2lock_1pct_write)               91.79%    10.44ns   95.79M
// folly_rwspin(4thr_4lock_1pct_write)                          4.71ns  212.30M
// shmtx_wr_pri(4thr_4lock_1pct_write)               90.37%     5.21ns  191.85M
// shmtx_rd_pri(4thr_4lock_1pct_write)               89.94%     5.24ns  190.95M
// folly_rwspin(8thr_8lock_1pct_write)                          2.33ns  429.91M
// shmtx_wr_pri(8thr_8lock_1pct_write)               90.67%     2.57ns  389.80M
// shmtx_rd_pri(8thr_8lock_1pct_write)               90.61%     2.57ns  389.55M
// folly_rwspin(16thr_16lock_1pct_write)                        1.10ns  905.23M
// shmtx_wr_pri(16thr_16lock_1pct_write)             91.96%     1.20ns  832.46M
// shmtx_rd_pri(16thr_16lock_1pct_write)             92.29%     1.20ns  835.42M
// folly_rwspin(32thr_32lock_1pct_write)                        1.14ns  879.85M
// shmtx_wr_pri(32thr_32lock_1pct_write)             93.41%     1.22ns  821.86M
// shmtx_rd_pri(32thr_32lock_1pct_write)             94.18%     1.21ns  828.66M
// folly_rwspin(64thr_64lock_1pct_write)                        1.34ns  748.83M
// shmtx_wr_pri(64thr_64lock_1pct_write)             94.39%     1.41ns  706.84M
// shmtx_rd_pri(64thr_64lock_1pct_write)             94.02%     1.42ns  704.06M
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// folly_rwspin_ping_pong(burn0)                              605.63ns    1.65M
// shmtx_w_bare_ping_pong(burn0)                    102.17%   592.76ns    1.69M
// shmtx_r_bare_ping_pong(burn0)                     88.75%   682.44ns    1.47M
// folly_ticket_ping_pong(burn0)                     63.92%   947.56ns    1.06M
// boost_shared_ping_pong(burn0)                      8.52%     7.11us  140.73K
// pthrd_rwlock_ping_pong(burn0)                      7.88%     7.68us  130.15K
// ----------------------------------------------------------------------------
// folly_rwspin_ping_pong(burn100k)                           727.76ns    1.37M
// shmtx_w_bare_ping_pong(burn100k)                 100.79%   722.09ns    1.38M
// shmtx_r_bare_ping_pong(burn100k)                 101.98%   713.61ns    1.40M
// folly_ticket_ping_pong(burn100k)                 102.80%   707.95ns    1.41M
// boost_shared_ping_pong(burn100k)                  81.49%   893.02ns    1.12M
// pthrd_rwlock_ping_pong(burn100k)                  71.05%     1.02us  976.30K
// ----------------------------------------------------------------------------
// folly_rwspin_ping_pong(burn300k)                             2.11us  473.46K
// shmtx_w_bare_ping_pong(burn300k)                 100.06%     2.11us  473.72K
// shmtx_r_bare_ping_pong(burn300k)                  98.93%     2.13us  468.39K
// folly_ticket_ping_pong(burn300k)                  96.68%     2.18us  457.73K
// boost_shared_ping_pong(burn300k)                  84.72%     2.49us  401.13K
// pthrd_rwlock_ping_pong(burn300k)                  84.62%     2.50us  400.66K
// ----------------------------------------------------------------------------
// folly_rwspin_ping_pong(burn1M)                             709.70ns    1.41M
// shmtx_w_bare_ping_pong(burn1M)                   100.28%   707.73ns    1.41M
// shmtx_r_bare_ping_pong(burn1M)                    99.63%   712.37ns    1.40M
// folly_ticket_ping_pong(burn1M)                   100.09%   709.05ns    1.41M
// boost_shared_ping_pong(burn1M)                    94.09%   754.29ns    1.33M
// pthrd_rwlock_ping_pong(burn1M)                    96.32%   736.82ns    1.36M
// ============================================================================

int main(int argc, char** argv) {
  (void)folly_rwspin_reads;
  (void)shmtx_wr_pri_reads;
  (void)shmtx_w_bare_reads;
  (void)shmtx_rd_pri_reads;
  (void)shmtx_r_bare_reads;
  (void)folly_ticket_reads;
  (void)boost_shared_reads;
  (void)pthrd_rwlock_reads;
  (void)timed_wr_pri_reads;
  (void)timed_rd_pri_reads;
  (void)folly_rwspin;
  (void)shmtx_wr_pri;
  (void)shmtx_w_bare;
  (void)shmtx_rd_pri;
  (void)shmtx_r_bare;
  (void)folly_ticket;
  (void)boost_shared;
  (void)pthrd_rwlock;
  (void)pthrd_mutex_;
  (void)timed_wr_pri;
  (void)timed_rd_pri;
  (void)folly_rwspin_ping_pong;
  (void)shmtx_w_bare_ping_pong;
  (void)shmtx_r_bare_ping_pong;
  (void)folly_ticket_ping_pong;
  (void)boost_shared_ping_pong;
  (void)pthrd_rwlock_ping_pong;
  (void)timed_wr_pri_ping_pong;
  (void)timed_rd_pri_ping_pong;

  testing::InitGoogleTest(&argc, argv);
  gflags::ParseCommandLineFlags(&argc, &argv, true);
  int rv = RUN_ALL_TESTS();
  folly::runBenchmarksOnFlag();
  return rv;
}