llvm/openmp/runtime/src/kmp_lock.cpp

/*
 * kmp_lock.cpp -- lock-related functions
 */

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <stddef.h>
#include <atomic>

#include "kmp.h"
#include "kmp_i18n.h"
#include "kmp_io.h"
#include "kmp_itt.h"
#include "kmp_lock.h"
#include "kmp_wait_release.h"
#include "kmp_wrapper_getpid.h"

#if KMP_USE_FUTEX
#include <sys/syscall.h>
#include <unistd.h>
// We should really include <futex.h>, but that causes compatibility problems on
// different Linux* OS distributions that either require that you include (or
// break when you try to include) <pci/types.h>. Since all we need is the two
// macros below (which are part of the kernel ABI, so can't change) we just
// define the constants here and don't include <futex.h>
#ifndef FUTEX_WAIT
#define FUTEX_WAIT
#endif
#ifndef FUTEX_WAKE
#define FUTEX_WAKE
#endif
#endif

/* Implement spin locks for internal library use.             */
/* The algorithm implemented is Lamport's bakery lock [1974]. */

void __kmp_validate_locks(void) {}

/* ------------------------------------------------------------------------ */
/* test and set locks */

// For the non-nested locks, we can only assume that the first 4 bytes were
// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
// compiler only allocates a 4 byte pointer on IA-32 architecture.  On
// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
//
// gcc reserves >= 8 bytes for nested locks, so we can assume that the
// entire 8 bytes were allocated for nested locks on all 64-bit platforms.

static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) {}

static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) {}

__forceinline static int
__kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {}

int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck,
                                              kmp_int32 gtid) {}

int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck,
                                           kmp_int32 gtid) {}

int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck,
                                              kmp_int32 gtid) {}

void __kmp_init_tas_lock(kmp_tas_lock_t *lck) {}

void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) {}

static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) {}

// nested test and set locks

int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
                                                     kmp_int32 gtid) {}

int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
                                                  kmp_int32 gtid) {}

int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
                                                     kmp_int32 gtid) {}

void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) {}

void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) {}

static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {}

#if KMP_USE_FUTEX

/* ------------------------------------------------------------------------ */
/* futex locks */

// futex locks are really just test and set locks, with a different method
// of handling contention.  They take the same amount of space as test and
// set locks, and are allocated the same way (i.e. use the area allocated by
// the compiler for non-nested locks / allocate nested locks on the heap).

static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) {}

static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) {}

__forceinline static int
__kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) {}

int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck,
                                                kmp_int32 gtid) {}

int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck,
                                             kmp_int32 gtid) {}

int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck,
                                                kmp_int32 gtid) {}

void __kmp_init_futex_lock(kmp_futex_lock_t *lck) {}

void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) {}

static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) {}

// nested futex locks

int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
                                                       kmp_int32 gtid) {}

int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
                                                    kmp_int32 gtid) {}

int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
                                                       kmp_int32 gtid) {}

void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) {}

void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) {}

static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {}

#endif // KMP_USE_FUTEX

/* ------------------------------------------------------------------------ */
/* ticket (bakery) locks */

static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) {}

static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) {}

static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) {}

__forceinline static int
__kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,
                                         kmp_int32 gtid) {}

int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
                                                 kmp_int32 gtid) {}

int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
                                              kmp_int32 gtid) {}

int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
                                                 kmp_int32 gtid) {}

void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) {}

void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) {}

static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {}

// nested ticket locks

int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
                                                        kmp_int32 gtid) {}

int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
                                                     kmp_int32 gtid) {}

int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
                                                        kmp_int32 gtid) {}

void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) {}

void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) {}

static void
__kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {}

// access functions to fields which don't exist for all lock kinds.

static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) {}

static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck,
                                           const ident_t *loc) {}

static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) {}

static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck,
                                        kmp_lock_flags_t flags) {}

/* ------------------------------------------------------------------------ */
/* queuing locks */

/* First the states
   (head,tail) =              0, 0  means lock is unheld, nobody on queue
                 UINT_MAX or -1, 0  means lock is held, nobody on queue
                              h, h  means lock held or about to transition,
                                    1 element on queue
                              h, t  h <> t, means lock is held or about to
                                    transition, >1 elements on queue

   Now the transitions
      Acquire(0,0)  = -1 ,0
      Release(0,0)  = Error
      Acquire(-1,0) =  h ,h    h > 0
      Release(-1,0) =  0 ,0
      Acquire(h,h)  =  h ,t    h > 0, t > 0, h <> t
      Release(h,h)  = -1 ,0    h > 0
      Acquire(h,t)  =  h ,t'   h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
      Release(h,t)  =  h',t    h > 0, t > 0, h <> t, h <> h', h' maybe = t

   And pictorially

           +-----+
           | 0, 0|------- release -------> Error
           +-----+
             |  ^
      acquire|  |release
             |  |
             |  |
             v  |
           +-----+
           |-1, 0|
           +-----+
             |  ^
      acquire|  |release
             |  |
             |  |
             v  |
           +-----+
           | h, h|
           +-----+
             |  ^
      acquire|  |release
             |  |
             |  |
             v  |
           +-----+
           | h, t|----- acquire, release loopback ---+
           +-----+                                   |
                ^                                    |
                |                                    |
                +------------------------------------+
 */

#ifdef DEBUG_QUEUING_LOCKS

/* Stuff for circular trace buffer */
#define TRACE_BUF_ELE
static char traces[TRACE_BUF_ELE][128] = {0};
static int tc = 0;
#define TRACE_LOCK
#define TRACE_LOCK_T
#define TRACE_LOCK_HT

static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid,
                                    kmp_queuing_lock_t *lck, kmp_int32 head_id,
                                    kmp_int32 tail_id) {
  kmp_int32 t, i;

  __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");

  i = tc % TRACE_BUF_ELE;
  __kmp_printf_no_lock("%s\n", traces[i]);
  i = (i + 1) % TRACE_BUF_ELE;
  while (i != (tc % TRACE_BUF_ELE)) {
    __kmp_printf_no_lock("%s", traces[i]);
    i = (i + 1) % TRACE_BUF_ELE;
  }
  __kmp_printf_no_lock("\n");

  __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "
                       "next_wait:%d, head_id:%d, tail_id:%d\n",
                       gtid + 1, this_thr->th.th_spin_here,
                       this_thr->th.th_next_waiting, head_id, tail_id);

  __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id);

  if (lck->lk.head_id >= 1) {
    t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting;
    while (t > 0) {
      __kmp_printf_no_lock("-> %d ", t);
      t = __kmp_threads[t - 1]->th.th_next_waiting;
    }
  }
  __kmp_printf_no_lock(";  tail: %d ", lck->lk.tail_id);
  __kmp_printf_no_lock("\n\n");
}

#endif /* DEBUG_QUEUING_LOCKS */

static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) {}

static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) {}

/* Acquire a lock using a the queuing lock implementation */
template <bool takeTime>
/* [TLW] The unused template above is left behind because of what BEB believes
   is a potential compiler problem with __forceinline. */
__forceinline static int
__kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
                                          kmp_int32 gtid) {}

int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
                                                  kmp_int32 gtid) {}

int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
                                               kmp_int32 gtid) {}

int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
                                                  kmp_int32 gtid) {}

void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) {}

void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) {}

static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {}

// nested queuing locks

int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {}

static int
__kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
                                              kmp_int32 gtid) {}

int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
                                                      kmp_int32 gtid) {}

int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {}

static int
__kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
                                              kmp_int32 gtid) {}

void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) {}

void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) {}

static void
__kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {}

// access functions to fields which don't exist for all lock kinds.

static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) {}

static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck,
                                            const ident_t *loc) {}

static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) {}

static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck,
                                         kmp_lock_flags_t flags) {}

#if KMP_USE_ADAPTIVE_LOCKS

/* RTM Adaptive locks */

#if KMP_HAVE_RTM_INTRINSICS
#include <immintrin.h>
#define SOFT_ABORT_MASK

#else

// Values from the status register after failed speculation.
#define _XBEGIN_STARTED
#define _XABORT_EXPLICIT
#define _XABORT_RETRY
#define _XABORT_CONFLICT
#define _XABORT_CAPACITY
#define _XABORT_DEBUG
#define _XABORT_NESTED
#define _XABORT_CODE

// Aborts for which it's worth trying again immediately
#define SOFT_ABORT_MASK

#define STRINGIZE_INTERNAL
#define STRINGIZE

// Access to RTM instructions
/*A version of XBegin which returns -1 on speculation, and the value of EAX on
  an abort. This is the same definition as the compiler intrinsic that will be
  supported at some point. */
static __inline int _xbegin() {
  int res = -1;

#if KMP_OS_WINDOWS
#if KMP_ARCH_X86_64
  _asm {
        _emit 0xC7
        _emit 0xF8
        _emit 2
        _emit 0
        _emit 0
        _emit 0
        jmp   L2
        mov   res, eax
    L2:
  }
#else /* IA32 */
  _asm {
        _emit 0xC7
        _emit 0xF8
        _emit 2
        _emit 0
        _emit 0
        _emit 0
        jmp   L2
        mov   res, eax
    L2:
  }
#endif // KMP_ARCH_X86_64
#else
  /* Note that %eax must be noted as killed (clobbered), because the XSR is
     returned in %eax(%rax) on abort.  Other register values are restored, so
     don't need to be killed.

     We must also mark 'res' as an input and an output, since otherwise
     'res=-1' may be dropped as being dead, whereas we do need the assignment on
     the successful (i.e., non-abort) path. */
  __asm__ volatile("1: .byte  0xC7; .byte 0xF8;\n"
                   "   .long  1f-1b-6\n"
                   "    jmp   2f\n"
                   "1:  movl  %%eax,%0\n"
                   "2:"
                   : "+r"(res)::"memory", "%eax");
#endif // KMP_OS_WINDOWS
  return res;
}

/* Transaction end */
static __inline void _xend() {
#if KMP_OS_WINDOWS
  __asm {
        _emit 0x0f
        _emit 0x01
        _emit 0xd5
  }
#else
  __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
#endif
}

/* This is a macro, the argument must be a single byte constant which can be
   evaluated by the inline assembler, since it is emitted as a byte into the
   assembly code. */
// clang-format off
#if KMP_OS_WINDOWS
#define _xabort
#else
#define _xabort
#endif
// clang-format on
#endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300

// Statistics is collected for testing purpose
#if KMP_DEBUG_ADAPTIVE_LOCKS

// We accumulate speculative lock statistics when the lock is destroyed. We
// keep locks that haven't been destroyed in the liveLocks list so that we can
// grab their statistics too.
static kmp_adaptive_lock_statistics_t destroyedStats;

// To hold the list of live locks.
static kmp_adaptive_lock_info_t liveLocks;

// A lock so we can safely update the list of locks.
static kmp_bootstrap_lock_t chain_lock =
    KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock);

// Initialize the list of stats.
void __kmp_init_speculative_stats() {
  kmp_adaptive_lock_info_t *lck = &liveLocks;

  memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0,
         sizeof(lck->stats));
  lck->stats.next = lck;
  lck->stats.prev = lck;

  KMP_ASSERT(lck->stats.next->stats.prev == lck);
  KMP_ASSERT(lck->stats.prev->stats.next == lck);

  __kmp_init_bootstrap_lock(&chain_lock);
}

// Insert the lock into the circular list
static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) {
  __kmp_acquire_bootstrap_lock(&chain_lock);

  lck->stats.next = liveLocks.stats.next;
  lck->stats.prev = &liveLocks;

  liveLocks.stats.next = lck;
  lck->stats.next->stats.prev = lck;

  KMP_ASSERT(lck->stats.next->stats.prev == lck);
  KMP_ASSERT(lck->stats.prev->stats.next == lck);

  __kmp_release_bootstrap_lock(&chain_lock);
}

static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) {
  KMP_ASSERT(lck->stats.next->stats.prev == lck);
  KMP_ASSERT(lck->stats.prev->stats.next == lck);

  kmp_adaptive_lock_info_t *n = lck->stats.next;
  kmp_adaptive_lock_info_t *p = lck->stats.prev;

  n->stats.prev = p;
  p->stats.next = n;
}

static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) {
  memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0,
         sizeof(lck->stats));
  __kmp_remember_lock(lck);
}

static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t,
                            kmp_adaptive_lock_info_t *lck) {
  kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;

  t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
  t->successfulSpeculations += s->successfulSpeculations;
  t->hardFailedSpeculations += s->hardFailedSpeculations;
  t->softFailedSpeculations += s->softFailedSpeculations;
  t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
  t->lemmingYields += s->lemmingYields;
}

static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) {
  __kmp_acquire_bootstrap_lock(&chain_lock);

  __kmp_add_stats(&destroyedStats, lck);
  __kmp_forget_lock(lck);

  __kmp_release_bootstrap_lock(&chain_lock);
}

static float percent(kmp_uint32 count, kmp_uint32 total) {
  return (total == 0) ? 0.0 : (100.0 * count) / total;
}

void __kmp_print_speculative_stats() {
  kmp_adaptive_lock_statistics_t total = destroyedStats;
  kmp_adaptive_lock_info_t *lck;

  for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
    __kmp_add_stats(&total, lck);
  }
  kmp_adaptive_lock_statistics_t *t = &total;
  kmp_uint32 totalSections =
      t->nonSpeculativeAcquires + t->successfulSpeculations;
  kmp_uint32 totalSpeculations = t->successfulSpeculations +
                                 t->hardFailedSpeculations +
                                 t->softFailedSpeculations;
  if (totalSections <= 0)
    return;

  kmp_safe_raii_file_t statsFile;
  if (strcmp(__kmp_speculative_statsfile, "-") == 0) {
    statsFile.set_stdout();
  } else {
    size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20;
    char buffer[buffLen];
    KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile,
                 (kmp_int32)getpid());
    statsFile.open(buffer, "w");
  }

  fprintf(statsFile, "Speculative lock statistics (all approximate!)\n");
  fprintf(statsFile,
          " Lock parameters: \n"
          "   max_soft_retries               : %10d\n"
          "   max_badness                    : %10d\n",
          __kmp_adaptive_backoff_params.max_soft_retries,
          __kmp_adaptive_backoff_params.max_badness);
  fprintf(statsFile, " Non-speculative acquire attempts : %10d\n",
          t->nonSpeculativeAcquireAttempts);
  fprintf(statsFile, " Total critical sections          : %10d\n",
          totalSections);
  fprintf(statsFile, " Successful speculations          : %10d (%5.1f%%)\n",
          t->successfulSpeculations,
          percent(t->successfulSpeculations, totalSections));
  fprintf(statsFile, " Non-speculative acquires         : %10d (%5.1f%%)\n",
          t->nonSpeculativeAcquires,
          percent(t->nonSpeculativeAcquires, totalSections));
  fprintf(statsFile, " Lemming yields                   : %10d\n\n",
          t->lemmingYields);

  fprintf(statsFile, " Speculative acquire attempts     : %10d\n",
          totalSpeculations);
  fprintf(statsFile, " Successes                        : %10d (%5.1f%%)\n",
          t->successfulSpeculations,
          percent(t->successfulSpeculations, totalSpeculations));
  fprintf(statsFile, " Soft failures                    : %10d (%5.1f%%)\n",
          t->softFailedSpeculations,
          percent(t->softFailedSpeculations, totalSpeculations));
  fprintf(statsFile, " Hard failures                    : %10d (%5.1f%%)\n",
          t->hardFailedSpeculations,
          percent(t->hardFailedSpeculations, totalSpeculations));
}

#define KMP_INC_STAT
#else
#define KMP_INC_STAT(lck, stat)

#endif // KMP_DEBUG_ADAPTIVE_LOCKS

static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {}

// Functions for manipulating the badness
static __inline void
__kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) {}

// Create a bit mask with one more set bit.
static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) {}

// Check whether speculation should be attempted.
KMP_ATTRIBUTE_TARGET_RTM
static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck,
                                           kmp_int32 gtid) {}

// Attempt to acquire only the speculative lock.
// Does not back off to the non-speculative lock.
KMP_ATTRIBUTE_TARGET_RTM
static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck,
                                         kmp_int32 gtid) {}

// Attempt to acquire the speculative lock, or back off to the non-speculative
// one if the speculative lock cannot be acquired.
// We can succeed speculatively, non-speculatively, or fail.
static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
                                                kmp_int32 gtid) {}

// Block until we can acquire a speculative, adaptive lock. We check whether we
// should be trying to speculate. If we should be, we check the real lock to see
// if it is free, and, if not, pause without attempting to acquire it until it
// is. Then we try the speculative acquire. This means that although we suffer
// from lemmings a little (because all we can't acquire the lock speculatively
// until the queue of threads waiting has cleared), we don't get into a state
// where we can never acquire the lock speculatively (because we force the queue
// to clear by preventing new arrivals from entering the queue). This does mean
// that when we're trying to break lemmings, the lock is no longer fair. However
// OpenMP makes no guarantee that its locks are fair, so this isn't a real
// problem.
static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
                                        kmp_int32 gtid) {}

static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
                                                    kmp_int32 gtid) {}

KMP_ATTRIBUTE_TARGET_RTM
static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck,
                                       kmp_int32 gtid) {}

static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
                                                   kmp_int32 gtid) {}

static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) {}

static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) {}

static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {}

#endif // KMP_USE_ADAPTIVE_LOCKS

/* ------------------------------------------------------------------------ */
/* DRDPA ticket locks                                                */
/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */

static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) {}

static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) {}

__forceinline static int
__kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {}

int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
                                                kmp_int32 gtid) {}

int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
                                             kmp_int32 gtid) {}

int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
                                                kmp_int32 gtid) {}

void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) {}

void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) {}

static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {}

// nested drdpa ticket locks

int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {}

static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
                                                        kmp_int32 gtid) {}

int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
                                                    kmp_int32 gtid) {}

int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
                                                       kmp_int32 gtid) {}

void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {}

void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {}

static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {}

// access functions to fields which don't exist for all lock kinds.

static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) {}

static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck,
                                          const ident_t *loc) {}

static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) {}

static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck,
                                       kmp_lock_flags_t flags) {}

// Time stamp counter
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
#define __kmp_tsc()
// Runtime's default backoff parameters
kmp_backoff_t __kmp_spin_backoff_params =;
#else
// Use nanoseconds for other platforms
extern kmp_uint64 __kmp_now_nsec();
kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100};
#define __kmp_tsc
#endif

// A useful predicate for dealing with timestamps that may wrap.
// Is a before b? Since the timestamps may wrap, this is asking whether it's
// shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
// Times where going clockwise is less distance than going anti-clockwise
// are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),
// then a > b (true) does not mean a reached b; whereas signed(a) = -2,
// signed(b) = 0 captures the actual difference
static inline bool before(kmp_uint64 a, kmp_uint64 b) {}

// Truncated binary exponential backoff function
void __kmp_spin_backoff(kmp_backoff_t *boff) {}

#if KMP_USE_DYNAMIC_LOCK

// Direct lock initializers. It simply writes a tag to the low 8 bits of the
// lock word.
static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,
                                   kmp_dyna_lockseq_t seq) {}

#if KMP_USE_TSX

// HLE lock functions - imported from the testbed runtime.
#define HLE_ACQUIRE
#define HLE_RELEASE

static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) {}

static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) {}

static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) {}

static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {}

static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck,
                                               kmp_int32 gtid) {}

static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck,
                                              kmp_int32 gtid) {}

static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,
                                           kmp_int32 gtid) {}

static void __kmp_init_rtm_queuing_lock(kmp_queuing_lock_t *lck) {}

static void __kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t *lck) {}

static void
__kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {}

KMP_ATTRIBUTE_TARGET_RTM
static void __kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t *lck,
                                           kmp_int32 gtid) {}

static void __kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
                                                       kmp_int32 gtid) {}

KMP_ATTRIBUTE_TARGET_RTM
static int __kmp_release_rtm_queuing_lock(kmp_queuing_lock_t *lck,
                                          kmp_int32 gtid) {}

static int __kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
                                                      kmp_int32 gtid) {}

KMP_ATTRIBUTE_TARGET_RTM
static int __kmp_test_rtm_queuing_lock(kmp_queuing_lock_t *lck,
                                       kmp_int32 gtid) {}

static int __kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
                                                   kmp_int32 gtid) {}

// Reuse kmp_tas_lock_t for TSX lock which use RTM with fall-back spin lock.
kmp_rtm_spin_lock_t;

static void __kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t *lck) {}

static void __kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck) {}

KMP_ATTRIBUTE_TARGET_RTM
static int __kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
                                       kmp_int32 gtid) {}

static int __kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
                                                   kmp_int32 gtid) {}

KMP_ATTRIBUTE_TARGET_RTM
static int __kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
                                       kmp_int32 gtid) {}

static int __kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
                                                   kmp_int32 gtid) {}

KMP_ATTRIBUTE_TARGET_RTM
static int __kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t *lck, kmp_int32 gtid) {}

static int __kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
                                                kmp_int32 gtid) {}

#endif // KMP_USE_TSX

// Entry functions for indirect locks (first element of direct lock jump tables)
static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
                                     kmp_dyna_lockseq_t tag);
static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
                                               kmp_int32);
static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
                                                 kmp_int32);
static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
                                                kmp_int32);

// Lock function definitions for the union parameter type
#define KMP_FOREACH_LOCK_KIND(m, a)

#define expand1
#define expand2
#define expand3
#define expand4

KMP_FOREACH_LOCK_KIND()
KMP_FOREACH_LOCK_KIND()
KMP_FOREACH_LOCK_KIND()
KMP_FOREACH_LOCK_KIND()
KMP_FOREACH_LOCK_KIND()
KMP_FOREACH_LOCK_KIND()
KMP_FOREACH_LOCK_KIND()
KMP_FOREACH_LOCK_KIND()
KMP_FOREACH_LOCK_KIND()
KMP_FOREACH_LOCK_KIND()
KMP_FOREACH_LOCK_KIND()
KMP_FOREACH_LOCK_KIND()

#undef expand1
#undef expand2
#undef expand3
#undef expand4

// Jump tables for the indirect lock functions
// Only fill in the odd entries, that avoids the need to shift out the low bit

// init functions
#define expand
void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) =;
#undef expand

// destroy functions
#define expand
static void (*direct_destroy[])(kmp_dyna_lock_t *) =;
#undef expand
#define expand
static void (*direct_destroy_check[])(kmp_dyna_lock_t *) =;
#undef expand

// set/acquire functions
#define expand
static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) =;
#undef expand
#define expand
static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) =;
#undef expand

// unset/release and test functions
#define expand
static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) =;
static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) =;
#undef expand
#define expand
static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) =;
static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) =;
#undef expand

// Exposes only one set of jump tables (*lock or *lock_with_checks).
void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) =;
int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) =;
int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) =;
int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) =;

// Jump tables for the indirect lock functions
#define expand
void (*__kmp_indirect_init[])(kmp_user_lock_p) =;
#undef expand

#define expand
static void (*indirect_destroy[])(kmp_user_lock_p) =;
#undef expand
#define expand
static void (*indirect_destroy_check[])(kmp_user_lock_p) =;
#undef expand

// set/acquire functions
#define expand
static int (*indirect_set[])(kmp_user_lock_p,
                             kmp_int32) =;
#undef expand
#define expand
static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) =;
#undef expand

// unset/release and test functions
#define expand
static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) =;
static int (*indirect_test[])(kmp_user_lock_p,
                              kmp_int32) =;
#undef expand
#define expand
static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) =;
static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) =;
#undef expand

// Exposes only one jump tables (*lock or *lock_with_checks).
void (**__kmp_indirect_destroy)(kmp_user_lock_p) =;
int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) =;
int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) =;
int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) =;

// Lock index table.
kmp_indirect_lock_table_t __kmp_i_lock_table;

// Size of indirect locks.
static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] =;

// Jump tables for lock accessor/modifier.
void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
                                                     const ident_t *) =;
void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
                                                  kmp_lock_flags_t) =;
const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
    kmp_user_lock_p) =;
kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
    kmp_user_lock_p) =;

// Use different lock pools for different lock types.
static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] =;

// User lock allocator for dynamically dispatched indirect locks. Every entry of
// the indirect lock table holds the address and type of the allocated indirect
// lock (kmp_indirect_lock_t), and the size of the table doubles when it is
// full. A destroyed indirect lock object is returned to the reusable pool of
// locks, unique to each lock type.
kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
                                                  kmp_int32 gtid,
                                                  kmp_indirect_locktag_t tag) {}

// User lock lookup for dynamically dispatched locks.
static __forceinline kmp_indirect_lock_t *
__kmp_lookup_indirect_lock(void **user_lock, const char *func) {}

static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
                                     kmp_dyna_lockseq_t seq) {}

static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {}

static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {}

static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {}

static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {}

static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
                                               kmp_int32 gtid) {}

static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
                                                 kmp_int32 gtid) {}

static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
                                                kmp_int32 gtid) {}

kmp_dyna_lockseq_t __kmp_user_lock_seq =;

// This is used only in kmp_error.cpp when consistency checking is on.
kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) {}

// Initializes data for dynamic user locks.
void __kmp_init_dynamic_user_locks() {}

// Clean up the lock table.
void __kmp_cleanup_indirect_user_locks() {}

enum kmp_lock_kind __kmp_user_lock_kind =;
int __kmp_num_locks_in_block =; // FIXME - tune this value

#else // KMP_USE_DYNAMIC_LOCK

static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) {
  __kmp_init_tas_lock(lck);
}

static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
  __kmp_init_nested_tas_lock(lck);
}

#if KMP_USE_FUTEX
static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) {
  __kmp_init_futex_lock(lck);
}

static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
  __kmp_init_nested_futex_lock(lck);
}
#endif

static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) {
  return lck == lck->lk.self;
}

static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
  __kmp_init_ticket_lock(lck);
}

static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
  __kmp_init_nested_ticket_lock(lck);
}

static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) {
  return lck == lck->lk.initialized;
}

static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
  __kmp_init_queuing_lock(lck);
}

static void
__kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
  __kmp_init_nested_queuing_lock(lck);
}

#if KMP_USE_ADAPTIVE_LOCKS
static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
  __kmp_init_adaptive_lock(lck);
}
#endif

static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) {
  return lck == lck->lk.initialized;
}

static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
  __kmp_init_drdpa_lock(lck);
}

static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
  __kmp_init_nested_drdpa_lock(lck);
}

/* user locks
 * They are implemented as a table of function pointers which are set to the
 * lock functions of the appropriate kind, once that has been determined. */

enum kmp_lock_kind __kmp_user_lock_kind = lk_default;

size_t __kmp_base_user_lock_size = 0;
size_t __kmp_user_lock_size = 0;

kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL;
int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
                                            kmp_int32 gtid) = NULL;

int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
                                         kmp_int32 gtid) = NULL;
int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
                                            kmp_int32 gtid) = NULL;
void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL;
void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
                                                   kmp_int32 gtid) = NULL;

int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
                                                kmp_int32 gtid) = NULL;
int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
                                                   kmp_int32 gtid) = NULL;
void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;

int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL;
const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL;
void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
                                      const ident_t *loc) = NULL;
kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL;
void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
                                   kmp_lock_flags_t flags) = NULL;

void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) {
  switch (user_lock_kind) {
  case lk_default:
  default:
    KMP_ASSERT(0);

  case lk_tas: {
    __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t);
    __kmp_user_lock_size = sizeof(kmp_tas_lock_t);

    __kmp_get_user_lock_owner_ =
        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner);

    if (__kmp_env_consistency_check) {
      KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
    } else {
      KMP_BIND_USER_LOCK(tas);
      KMP_BIND_NESTED_USER_LOCK(tas);
    }

    __kmp_destroy_user_lock_ =
        (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock);

    __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;

    __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;

    __kmp_set_user_lock_location_ =
        (void (*)(kmp_user_lock_p, const ident_t *))NULL;

    __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;

    __kmp_set_user_lock_flags_ =
        (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
  } break;

#if KMP_USE_FUTEX

  case lk_futex: {
    __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t);
    __kmp_user_lock_size = sizeof(kmp_futex_lock_t);

    __kmp_get_user_lock_owner_ =
        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner);

    if (__kmp_env_consistency_check) {
      KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
    } else {
      KMP_BIND_USER_LOCK(futex);
      KMP_BIND_NESTED_USER_LOCK(futex);
    }

    __kmp_destroy_user_lock_ =
        (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock);

    __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;

    __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;

    __kmp_set_user_lock_location_ =
        (void (*)(kmp_user_lock_p, const ident_t *))NULL;

    __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;

    __kmp_set_user_lock_flags_ =
        (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
  } break;

#endif // KMP_USE_FUTEX

  case lk_ticket: {
    __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t);
    __kmp_user_lock_size = sizeof(kmp_ticket_lock_t);

    __kmp_get_user_lock_owner_ =
        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner);

    if (__kmp_env_consistency_check) {
      KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
    } else {
      KMP_BIND_USER_LOCK(ticket);
      KMP_BIND_NESTED_USER_LOCK(ticket);
    }

    __kmp_destroy_user_lock_ =
        (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock);

    __kmp_is_user_lock_initialized_ =
        (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized);

    __kmp_get_user_lock_location_ =
        (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location);

    __kmp_set_user_lock_location_ = (void (*)(
        kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location);

    __kmp_get_user_lock_flags_ =
        (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags);

    __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
        &__kmp_set_ticket_lock_flags);
  } break;

  case lk_queuing: {
    __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t);
    __kmp_user_lock_size = sizeof(kmp_queuing_lock_t);

    __kmp_get_user_lock_owner_ =
        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);

    if (__kmp_env_consistency_check) {
      KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
    } else {
      KMP_BIND_USER_LOCK(queuing);
      KMP_BIND_NESTED_USER_LOCK(queuing);
    }

    __kmp_destroy_user_lock_ =
        (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock);

    __kmp_is_user_lock_initialized_ =
        (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);

    __kmp_get_user_lock_location_ =
        (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);

    __kmp_set_user_lock_location_ = (void (*)(
        kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);

    __kmp_get_user_lock_flags_ =
        (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);

    __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
        &__kmp_set_queuing_lock_flags);
  } break;

#if KMP_USE_ADAPTIVE_LOCKS
  case lk_adaptive: {
    __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t);
    __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t);

    __kmp_get_user_lock_owner_ =
        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);

    if (__kmp_env_consistency_check) {
      KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
    } else {
      KMP_BIND_USER_LOCK(adaptive);
    }

    __kmp_destroy_user_lock_ =
        (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock);

    __kmp_is_user_lock_initialized_ =
        (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);

    __kmp_get_user_lock_location_ =
        (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);

    __kmp_set_user_lock_location_ = (void (*)(
        kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);

    __kmp_get_user_lock_flags_ =
        (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);

    __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
        &__kmp_set_queuing_lock_flags);

  } break;
#endif // KMP_USE_ADAPTIVE_LOCKS

  case lk_drdpa: {
    __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t);
    __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t);

    __kmp_get_user_lock_owner_ =
        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner);

    if (__kmp_env_consistency_check) {
      KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
    } else {
      KMP_BIND_USER_LOCK(drdpa);
      KMP_BIND_NESTED_USER_LOCK(drdpa);
    }

    __kmp_destroy_user_lock_ =
        (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock);

    __kmp_is_user_lock_initialized_ =
        (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized);

    __kmp_get_user_lock_location_ =
        (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location);

    __kmp_set_user_lock_location_ = (void (*)(
        kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location);

    __kmp_get_user_lock_flags_ =
        (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags);

    __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
        &__kmp_set_drdpa_lock_flags);
  } break;
  }
}

// ----------------------------------------------------------------------------
// User lock table & lock allocation

kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL};
kmp_user_lock_p __kmp_lock_pool = NULL;

// Lock block-allocation support.
kmp_block_of_locks *__kmp_lock_blocks = NULL;
int __kmp_num_locks_in_block = 1; // FIXME - tune this value

static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) {
  // Assume that kmp_global_lock is held upon entry/exit.
  kmp_lock_index_t index;
  if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) {
    kmp_lock_index_t size;
    kmp_user_lock_p *table;
    // Reallocate lock table.
    if (__kmp_user_lock_table.allocated == 0) {
      size = 1024;
    } else {
      size = __kmp_user_lock_table.allocated * 2;
    }
    table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size);
    KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1,
               sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1));
    table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table;
    // We cannot free the previous table now, since it may be in use by other
    // threads. So save the pointer to the previous table in the first
    // element of the new table. All the tables will be organized into a list,
    // and could be freed when library shutting down.
    __kmp_user_lock_table.table = table;
    __kmp_user_lock_table.allocated = size;
  }
  KMP_DEBUG_ASSERT(__kmp_user_lock_table.used <
                   __kmp_user_lock_table.allocated);
  index = __kmp_user_lock_table.used;
  __kmp_user_lock_table.table[index] = lck;
  ++__kmp_user_lock_table.used;
  return index;
}

static kmp_user_lock_p __kmp_lock_block_allocate() {
  // Assume that kmp_global_lock is held upon entry/exit.
  static int last_index = 0;
  if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) {
    // Restart the index.
    last_index = 0;
    // Need to allocate a new block.
    KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
    size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
    char *buffer =
        (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks));
    // Set up the new block.
    kmp_block_of_locks *new_block =
        (kmp_block_of_locks *)(&buffer[space_for_locks]);
    new_block->next_block = __kmp_lock_blocks;
    new_block->locks = (void *)buffer;
    // Publish the new block.
    KMP_MB();
    __kmp_lock_blocks = new_block;
  }
  kmp_user_lock_p ret = (kmp_user_lock_p)(&(
      ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size]));
  last_index++;
  return ret;
}

// Get memory for a lock. It may be freshly allocated memory or reused memory
// from lock pool.
kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid,
                                         kmp_lock_flags_t flags) {
  kmp_user_lock_p lck;
  kmp_lock_index_t index;
  KMP_DEBUG_ASSERT(user_lock);

  __kmp_acquire_lock(&__kmp_global_lock, gtid);

  if (__kmp_lock_pool == NULL) {
    // Lock pool is empty. Allocate new memory.

    if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point.
      lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size);
    } else {
      lck = __kmp_lock_block_allocate();
    }

    // Insert lock in the table so that it can be freed in __kmp_cleanup,
    // and debugger has info on all allocated locks.
    index = __kmp_lock_table_insert(lck);
  } else {
    // Pick up lock from pool.
    lck = __kmp_lock_pool;
    index = __kmp_lock_pool->pool.index;
    __kmp_lock_pool = __kmp_lock_pool->pool.next;
  }

  // We could potentially differentiate between nested and regular locks
  // here, and do the lock table lookup for regular locks only.
  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
    *((kmp_lock_index_t *)user_lock) = index;
  } else {
    *((kmp_user_lock_p *)user_lock) = lck;
  }

  // mark the lock if it is critical section lock.
  __kmp_set_user_lock_flags(lck, flags);

  __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper

  return lck;
}

// Put lock's memory to pool for reusing.
void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
                          kmp_user_lock_p lck) {
  KMP_DEBUG_ASSERT(user_lock != NULL);
  KMP_DEBUG_ASSERT(lck != NULL);

  __kmp_acquire_lock(&__kmp_global_lock, gtid);

  lck->pool.next = __kmp_lock_pool;
  __kmp_lock_pool = lck;
  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
    kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
    KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used);
    lck->pool.index = index;
  }

  __kmp_release_lock(&__kmp_global_lock, gtid);
}

kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) {
  kmp_user_lock_p lck = NULL;

  if (__kmp_env_consistency_check) {
    if (user_lock == NULL) {
      KMP_FATAL(LockIsUninitialized, func);
    }
  }

  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
    kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
    if (__kmp_env_consistency_check) {
      if (!(0 < index && index < __kmp_user_lock_table.used)) {
        KMP_FATAL(LockIsUninitialized, func);
      }
    }
    KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used);
    KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
    lck = __kmp_user_lock_table.table[index];
  } else {
    lck = *((kmp_user_lock_p *)user_lock);
  }

  if (__kmp_env_consistency_check) {
    if (lck == NULL) {
      KMP_FATAL(LockIsUninitialized, func);
    }
  }

  return lck;
}

void __kmp_cleanup_user_locks(void) {
  // Reset lock pool. Don't worry about lock in the pool--we will free them when
  // iterating through lock table (it includes all the locks, dead or alive).
  __kmp_lock_pool = NULL;

#define IS_CRITICAL

  // Loop through lock table, free all locks.
  // Do not free item [0], it is reserved for lock tables list.
  //
  // FIXME - we are iterating through a list of (pointers to) objects of type
  // union kmp_user_lock, but we have no way of knowing whether the base type is
  // currently "pool" or whatever the global user lock type is.
  //
  // We are relying on the fact that for all of the user lock types
  // (except "tas"), the first field in the lock struct is the "initialized"
  // field, which is set to the address of the lock object itself when
  // the lock is initialized.  When the union is of type "pool", the
  // first field is a pointer to the next object in the free list, which
  // will not be the same address as the object itself.
  //
  // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail
  // for "pool" objects on the free list.  This must happen as the "location"
  // field of real user locks overlaps the "index" field of "pool" objects.
  //
  // It would be better to run through the free list, and remove all "pool"
  // objects from the lock table before executing this loop.  However,
  // "pool" objects do not always have their index field set (only on
  // lin_32e), and I don't want to search the lock table for the address
  // of every "pool" object on the free list.
  while (__kmp_user_lock_table.used > 1) {
    const ident *loc;

    // reduce __kmp_user_lock_table.used before freeing the lock,
    // so that state of locks is consistent
    kmp_user_lock_p lck =
        __kmp_user_lock_table.table[--__kmp_user_lock_table.used];

    if ((__kmp_is_user_lock_initialized_ != NULL) &&
        (*__kmp_is_user_lock_initialized_)(lck)) {
      // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND
      // it is NOT a critical section (user is not responsible for destroying
      // criticals) AND we know source location to report.
      if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) &&
          ((loc = __kmp_get_user_lock_location(lck)) != NULL) &&
          (loc->psource != NULL)) {
        kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
        KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line);
        __kmp_str_loc_free(&str_loc);
      }

#ifdef KMP_DEBUG
      if (IS_CRITICAL(lck)) {
        KA_TRACE(
            20,
            ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",
             lck, *(void **)lck));
      } else {
        KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck,
                      *(void **)lck));
      }
#endif // KMP_DEBUG

      // Cleanup internal lock dynamic resources (for drdpa locks particularly).
      __kmp_destroy_user_lock(lck);
    }

    // Free the lock if block allocation of locks is not used.
    if (__kmp_lock_blocks == NULL) {
      __kmp_free(lck);
    }
  }

#undef IS_CRITICAL

  // delete lock table(s).
  kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
  __kmp_user_lock_table.table = NULL;
  __kmp_user_lock_table.allocated = 0;

  while (table_ptr != NULL) {
    // In the first element we saved the pointer to the previous
    // (smaller) lock table.
    kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]);
    __kmp_free(table_ptr);
    table_ptr = next;
  }

  // Free buffers allocated for blocks of locks.
  kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
  __kmp_lock_blocks = NULL;

  while (block_ptr != NULL) {
    kmp_block_of_locks_t *next = block_ptr->next_block;
    __kmp_free(block_ptr->locks);
    // *block_ptr itself was allocated at the end of the locks vector.
    block_ptr = next;
  }

  TCW_4(__kmp_init_user_locks, FALSE);
}

#endif // KMP_USE_DYNAMIC_LOCK