kmp_runtime.cpp | Explore in Territory

/*
 * kmp_runtime.cpp -- KPTS runtime support library
 */

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "kmp.h"
#include "kmp_affinity.h"
#include "kmp_atomic.h"
#include "kmp_environment.h"
#include "kmp_error.h"
#include "kmp_i18n.h"
#include "kmp_io.h"
#include "kmp_itt.h"
#include "kmp_settings.h"
#include "kmp_stats.h"
#include "kmp_str.h"
#include "kmp_wait_release.h"
#include "kmp_wrapper_getpid.h"
#include "kmp_dispatch.h"
#include "kmp_utils.h"
#if KMP_USE_HIER_SCHED
#include "kmp_dispatch_hier.h"
#endif

#if OMPT_SUPPORT
#include "ompt-specific.h"
#endif
#if OMPD_SUPPORT
#include "ompd-specific.h"
#endif

#if OMP_PROFILING_SUPPORT
#include "llvm/Support/TimeProfiler.h"
static char *ProfileTraceFile = nullptr;
#endif

/* these are temporary issues to be dealt with */
#define KMP_USE_PRCTL …

#if KMP_OS_WINDOWS
#include <process.h>
#endif

#ifndef KMP_USE_SHM
// Windows and WASI do not need these include files as they don't use shared
// memory.
#else
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#define SHM_SIZE …
#endif

#if defined(KMP_GOMP_COMPAT)
char const __kmp_version_alt_comp[] = …;
#endif /* defined(KMP_GOMP_COMPAT) */

char const __kmp_version_omp_api[] = …;

#ifdef KMP_DEBUG
char const __kmp_version_lock[] =
    KMP_VERSION_PREFIX "lock type: run time selectable";
#endif /* KMP_DEBUG */

#define KMP_MIN(x, y) …

/* ------------------------------------------------------------------------ */

#if KMP_USE_MONITOR
kmp_info_t __kmp_monitor;
#endif

/* Forward declarations */

void __kmp_cleanup(void);

static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid,
                                  int gtid);
static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
                                  kmp_internal_control_t *new_icvs,
                                  ident_t *loc);
#if KMP_AFFINITY_SUPPORTED
static void __kmp_partition_places(kmp_team_t *team,
                                   int update_master_only = 0);
#endif
static void __kmp_do_serial_initialize(void);
void __kmp_fork_barrier(int gtid, int tid);
void __kmp_join_barrier(int gtid);
void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
                          kmp_internal_control_t *new_icvs, ident_t *loc);

#ifdef USE_LOAD_BALANCE
static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc);
#endif

static int __kmp_expand_threads(int nNeed);
#if KMP_OS_WINDOWS
static int __kmp_unregister_root_other_thread(int gtid);
#endif
static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
kmp_info_t *__kmp_thread_pool_insert_pt = …;

void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
                               int new_nthreads);
void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads);

static kmp_nested_nthreads_t *__kmp_override_nested_nth(kmp_info_t *thr,
                                                        int level) { … }

/* Calculate the identifier of the current thread */
/* fast (and somewhat portable) way to get unique identifier of executing
   thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
int __kmp_get_global_thread_id() { … }

int __kmp_get_global_thread_id_reg() { … }

/* caller must hold forkjoin_lock */
void __kmp_check_stack_overlap(kmp_info_t *th) { … }

/* ------------------------------------------------------------------------ */

void __kmp_infinite_loop(void) { … }

#define MAX_MESSAGE …

void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size,
                                  char const *format, ...) { … }

void __kmp_warn(char const *format, ...) { … }

void __kmp_abort_process() { … } // __kmp_abort_process

void __kmp_abort_thread(void) { … } // __kmp_abort_thread

/* Print out the storage map for the major kmp_info_t thread data structures
   that are allocated together. */

static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) { … }

/* Print out the storage map for the major kmp_team_t team data structures
   that are allocated together. */

static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
                                         int team_id, int num_thr) { … }

static void __kmp_init_allocator() { … }
static void __kmp_fini_allocator() { … }

/* ------------------------------------------------------------------------ */

#if ENABLE_LIBOMPTARGET
static void __kmp_init_omptarget() { … }
#endif

/* ------------------------------------------------------------------------ */

#if KMP_DYNAMIC_LIB
#if KMP_OS_WINDOWS

BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
  //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );

  switch (fdwReason) {

  case DLL_PROCESS_ATTACH:
    KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n"));

    return TRUE;

  case DLL_PROCESS_DETACH:
    KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));

    // According to Windows* documentation for DllMain entry point:
    // for DLL_PROCESS_DETACH, lpReserved is used for telling the difference:
    //   lpReserved == NULL when FreeLibrary() is called,
    //   lpReserved != NULL when the process is terminated.
    // When FreeLibrary() is called, worker threads remain alive. So the
    // runtime's state is consistent and executing proper shutdown is OK.
    // When the process is terminated, worker threads have exited or been
    // forcefully terminated by the OS and only the shutdown thread remains.
    // This can leave the runtime in an inconsistent state.
    // Hence, only attempt proper cleanup when FreeLibrary() is called.
    // Otherwise, rely on OS to reclaim resources.
    if (lpReserved == NULL)
      __kmp_internal_end_library(__kmp_gtid_get_specific());

    return TRUE;

  case DLL_THREAD_ATTACH:
    KA_TRACE(10, ("DllMain: THREAD_ATTACH\n"));

    /* if we want to register new siblings all the time here call
     * __kmp_get_gtid(); */
    return TRUE;

  case DLL_THREAD_DETACH:
    KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));

    __kmp_internal_end_thread(__kmp_gtid_get_specific());
    return TRUE;
  }

  return TRUE;
}

#endif /* KMP_OS_WINDOWS */
#endif /* KMP_DYNAMIC_LIB */

/* __kmp_parallel_deo -- Wait until it's our turn. */
void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { … }

/* __kmp_parallel_dxo -- Signal the next task. */
void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { … }

/* ------------------------------------------------------------------------ */
/* The BARRIER for a SINGLE process section is always explicit   */

int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) { … }

void __kmp_exit_single(int gtid) { … }

/* determine if we can go parallel or must use a serialized parallel region and
 * how many threads we can use
 * set_nproc is the number of threads requested for the team
 * returns 0 if we should serialize or only use one thread,
 * otherwise the number of threads to use
 * The forkjoin lock is held by the caller. */
static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
                                 int master_tid, int set_nthreads,
                                 int enter_teams) { … }

/* Allocate threads from the thread pool and assign them to the new team. We are
   assured that there are enough threads available, because we checked on that
   earlier within critical section forkjoin */
static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
                                    kmp_info_t *master_th, int master_gtid,
                                    int fork_teams_workers) { … }

#if KMP_ARCH_X86 || KMP_ARCH_X86_64
// Propagate any changes to the floating point control registers out to the team
// We try to avoid unnecessary writes to the relevant cache line in the team
// structure, so we don't make changes unless they are needed.
inline static void propagateFPControl(kmp_team_t *team) { … }

// Do the opposite, setting the hardware registers to the updated values from
// the team.
inline static void updateHWFPControl(kmp_team_t *team) { … }
#else
#define propagateFPControl …
#define updateHWFPControl …
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */

static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team,
                                     int realloc); // forward declaration

/* Run a parallel region that has been serialized, so runs only in a team of the
   single primary thread. */
void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { … }

// Test if this fork is for a team closely nested in a teams construct
static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
                                          microtask_t microtask, int level,
                                          int teams_level, kmp_va_list ap) { … }

// Test if this fork is for the teams construct, i.e. to form the outer league
// of teams
static inline bool __kmp_is_entering_teams(int active_level, int level,
                                           int teams_level, kmp_va_list ap) { … }

// AC: This is start of parallel that is nested inside teams construct.
// The team is actual (hot), all workers are ready at the fork barrier.
// No lock needed to initialize the team a bit, then free workers.
static inline int
__kmp_fork_in_teams(ident_t *loc, int gtid, kmp_team_t *parent_team,
                    kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
                    enum fork_context_e call_context, microtask_t microtask,
                    launch_t invoker, int master_set_numthreads, int level,
#if OMPT_SUPPORT
                    ompt_data_t ompt_parallel_data, void *return_address,
#endif
                    kmp_va_list ap) { … }

// Create a serialized parallel region
static inline int
__kmp_serial_fork_call(ident_t *loc, int gtid, enum fork_context_e call_context,
                       kmp_int32 argc, microtask_t microtask, launch_t invoker,
                       kmp_info_t *master_th, kmp_team_t *parent_team,
#if OMPT_SUPPORT
                       ompt_data_t *ompt_parallel_data, void **return_address,
                       ompt_data_t **parent_task_data,
#endif
                       kmp_va_list ap) { … }

/* most of the work for a fork */
/* return true if we really went parallel, false if serialized */
int __kmp_fork_call(ident_t *loc, int gtid,
                    enum fork_context_e call_context, // Intel, GNU, ...
                    kmp_int32 argc, microtask_t microtask, launch_t invoker,
                    kmp_va_list ap) { … }

#if OMPT_SUPPORT
static inline void __kmp_join_restore_state(kmp_info_t *thread,
                                            kmp_team_t *team) { … }

static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
                                   kmp_team_t *team, ompt_data_t *parallel_data,
                                   int flags, void *codeptr) { … }
#endif

void __kmp_join_call(ident_t *loc, int gtid
#if OMPT_SUPPORT
                     ,
                     enum fork_context_e fork_context
#endif
                     ,
                     int exit_teams) { … }

/* Check whether we should push an internal control record onto the
   serial team stack.  If so, do it.  */
void __kmp_save_internal_controls(kmp_info_t *thread) { … }

/* Changes set_nproc */
void __kmp_set_num_threads(int new_nth, int gtid) { … }

/* Changes max_active_levels */
void __kmp_set_max_active_levels(int gtid, int max_active_levels) { … }

/* Gets max_active_levels */
int __kmp_get_max_active_levels(int gtid) { … }

// nteams-var per-device ICV
void __kmp_set_num_teams(int num_teams) { … }
int __kmp_get_max_teams(void) { … }
// teams-thread-limit-var per-device ICV
void __kmp_set_teams_thread_limit(int limit) { … }
int __kmp_get_teams_thread_limit(void) { … }

KMP_BUILD_ASSERT(…);
KMP_BUILD_ASSERT(…);

/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) { … }

/* Gets def_sched_var ICV values */
void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) { … }

int __kmp_get_ancestor_thread_num(int gtid, int level) { … }

int __kmp_get_team_size(int gtid, int level) { … }

kmp_r_sched_t __kmp_get_schedule_global() { … }

/* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
   at least argc number of *t_argv entries for the requested team. */
static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) { … }

static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) { … }

static void __kmp_free_team_arrays(kmp_team_t *team) { … }

static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) { … }

static kmp_internal_control_t __kmp_get_global_icvs(void) { … }

static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) { … }

static void __kmp_initialize_root(kmp_root_t *root) { … }

#ifdef KMP_DEBUG

typedef struct kmp_team_list_item {
  kmp_team_p const *entry;
  struct kmp_team_list_item *next;
} kmp_team_list_item_t;
typedef kmp_team_list_item_t *kmp_team_list_t;

static void __kmp_print_structure_team_accum( // Add team to list of teams.
    kmp_team_list_t list, // List of teams.
    kmp_team_p const *team // Team to add.
) {

  // List must terminate with item where both entry and next are NULL.
  // Team is added to the list only once.
  // List is sorted in ascending order by team id.
  // Team id is *not* a key.

  kmp_team_list_t l;

  KMP_DEBUG_ASSERT(list != NULL);
  if (team == NULL) {
    return;
  }

  __kmp_print_structure_team_accum(list, team->t.t_parent);
  __kmp_print_structure_team_accum(list, team->t.t_next_pool);

  // Search list for the team.
  l = list;
  while (l->next != NULL && l->entry != team) {
    l = l->next;
  }
  if (l->next != NULL) {
    return; // Team has been added before, exit.
  }

  // Team is not found. Search list again for insertion point.
  l = list;
  while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
    l = l->next;
  }

  // Insert team.
  {
    kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
        sizeof(kmp_team_list_item_t));
    *item = *l;
    l->entry = team;
    l->next = item;
  }
}

static void __kmp_print_structure_team(char const *title, kmp_team_p const *team

) {
  __kmp_printf("%s", title);
  if (team != NULL) {
    __kmp_printf("%2x %p\n", team->t.t_id, team);
  } else {
    __kmp_printf(" - (nil)\n");
  }
}

static void __kmp_print_structure_thread(char const *title,
                                         kmp_info_p const *thread) {
  __kmp_printf("%s", title);
  if (thread != NULL) {
    __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
  } else {
    __kmp_printf(" - (nil)\n");
  }
}

void __kmp_print_structure(void) {

  kmp_team_list_t list;

  // Initialize list of teams.
  list =
      (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t));
  list->entry = NULL;
  list->next = NULL;

  __kmp_printf("\n------------------------------\nGlobal Thread "
               "Table\n------------------------------\n");
  {
    int gtid;
    for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
      __kmp_printf("%2d", gtid);
      if (__kmp_threads != NULL) {
        __kmp_printf(" %p", __kmp_threads[gtid]);
      }
      if (__kmp_root != NULL) {
        __kmp_printf(" %p", __kmp_root[gtid]);
      }
      __kmp_printf("\n");
    }
  }

  // Print out __kmp_threads array.
  __kmp_printf("\n------------------------------\nThreads\n--------------------"
               "----------\n");
  if (__kmp_threads != NULL) {
    int gtid;
    for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
      kmp_info_t const *thread = __kmp_threads[gtid];
      if (thread != NULL) {
        __kmp_printf("GTID %2d %p:\n", gtid, thread);
        __kmp_printf("    Our Root:        %p\n", thread->th.th_root);
        __kmp_print_structure_team("    Our Team:     ", thread->th.th_team);
        __kmp_print_structure_team("    Serial Team:  ",
                                   thread->th.th_serial_team);
        __kmp_printf("    Threads:      %2d\n", thread->th.th_team_nproc);
        __kmp_print_structure_thread("    Primary:      ",
                                     thread->th.th_team_master);
        __kmp_printf("    Serialized?:  %2d\n", thread->th.th_team_serialized);
        __kmp_printf("    Set NProc:    %2d\n", thread->th.th_set_nproc);
        __kmp_printf("    Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
        __kmp_print_structure_thread("    Next in pool: ",
                                     thread->th.th_next_pool);
        __kmp_printf("\n");
        __kmp_print_structure_team_accum(list, thread->th.th_team);
        __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
      }
    }
  } else {
    __kmp_printf("Threads array is not allocated.\n");
  }

  // Print out __kmp_root array.
  __kmp_printf("\n------------------------------\nUbers\n----------------------"
               "--------\n");
  if (__kmp_root != NULL) {
    int gtid;
    for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
      kmp_root_t const *root = __kmp_root[gtid];
      if (root != NULL) {
        __kmp_printf("GTID %2d %p:\n", gtid, root);
        __kmp_print_structure_team("    Root Team:    ", root->r.r_root_team);
        __kmp_print_structure_team("    Hot Team:     ", root->r.r_hot_team);
        __kmp_print_structure_thread("    Uber Thread:  ",
                                     root->r.r_uber_thread);
        __kmp_printf("    Active?:      %2d\n", root->r.r_active);
        __kmp_printf("    In Parallel:  %2d\n",
                     KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
        __kmp_printf("\n");
        __kmp_print_structure_team_accum(list, root->r.r_root_team);
        __kmp_print_structure_team_accum(list, root->r.r_hot_team);
      }
    }
  } else {
    __kmp_printf("Ubers array is not allocated.\n");
  }

  __kmp_printf("\n------------------------------\nTeams\n----------------------"
               "--------\n");
  while (list->next != NULL) {
    kmp_team_p const *team = list->entry;
    int i;
    __kmp_printf("Team %2x %p:\n", team->t.t_id, team);
    __kmp_print_structure_team("    Parent Team:      ", team->t.t_parent);
    __kmp_printf("    Primary TID:      %2d\n", team->t.t_master_tid);
    __kmp_printf("    Max threads:      %2d\n", team->t.t_max_nproc);
    __kmp_printf("    Levels of serial: %2d\n", team->t.t_serialized);
    __kmp_printf("    Number threads:   %2d\n", team->t.t_nproc);
    for (i = 0; i < team->t.t_nproc; ++i) {
      __kmp_printf("    Thread %2d:      ", i);
      __kmp_print_structure_thread("", team->t.t_threads[i]);
    }
    __kmp_print_structure_team("    Next in pool:     ", team->t.t_next_pool);
    __kmp_printf("\n");
    list = list->next;
  }

  // Print out __kmp_thread_pool and __kmp_team_pool.
  __kmp_printf("\n------------------------------\nPools\n----------------------"
               "--------\n");
  __kmp_print_structure_thread("Thread pool:          ",
                               CCAST(kmp_info_t *, __kmp_thread_pool));
  __kmp_print_structure_team("Team pool:            ",
                             CCAST(kmp_team_t *, __kmp_team_pool));
  __kmp_printf("\n");

  // Free team list.
  while (list != NULL) {
    kmp_team_list_item_t *item = list;
    list = list->next;
    KMP_INTERNAL_FREE(item);
  }
}

#endif

//---------------------------------------------------------------------------
//  Stuff for per-thread fast random number generator
//  Table of primes
static const unsigned __kmp_primes[] = …;

//---------------------------------------------------------------------------
//  __kmp_get_random: Get a random number using a linear congruential method.
unsigned short __kmp_get_random(kmp_info_t *thread) { … }
//--------------------------------------------------------
// __kmp_init_random: Initialize a random number generator
void __kmp_init_random(kmp_info_t *thread) { … }

#if KMP_OS_WINDOWS
/* reclaim array entries for root threads that are already dead, returns number
 * reclaimed */
static int __kmp_reclaim_dead_roots(void) {
  int i, r = 0;

  for (i = 0; i < __kmp_threads_capacity; ++i) {
    if (KMP_UBER_GTID(i) &&
        !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
        !__kmp_root[i]
             ->r.r_active) { // AC: reclaim only roots died in non-active state
      r += __kmp_unregister_root_other_thread(i);
    }
  }
  return r;
}
#endif

/* This function attempts to create free entries in __kmp_threads and
   __kmp_root, and returns the number of free entries generated.

   For Windows* OS static library, the first mechanism used is to reclaim array
   entries for root threads that are already dead.

   On all platforms, expansion is attempted on the arrays __kmp_threads_ and
   __kmp_root, with appropriate update to __kmp_threads_capacity. Array
   capacity is increased by doubling with clipping to __kmp_tp_capacity, if
   threadprivate cache array has been created. Synchronization with
   __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.

   After any dead root reclamation, if the clipping value allows array expansion
   to result in the generation of a total of nNeed free slots, the function does
   that expansion. If not, nothing is done beyond the possible initial root
   thread reclamation.

   If any argument is negative, the behavior is undefined. */
static int __kmp_expand_threads(int nNeed) { … }

/* Register the current thread as a root thread and obtain our gtid. We must
   have the __kmp_initz_lock held at this point. Argument TRUE only if are the
   thread that calls from __kmp_do_serial_initialize() */
int __kmp_register_root(int initial_thread) { … }

#if KMP_NESTED_HOT_TEAMS
static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level,
                                const int max_level) { … }
#endif

// Resets a root thread and clear its root and hot teams.
// Returns the number of __kmp_threads entries directly and indirectly freed.
static int __kmp_reset_root(int gtid, kmp_root_t *root) { … }

void __kmp_unregister_root_current_thread(int gtid) { … }

#if KMP_OS_WINDOWS
/* __kmp_forkjoin_lock must be already held
   Unregisters a root thread that is not the current thread.  Returns the number
   of __kmp_threads entries freed as a result. */
static int __kmp_unregister_root_other_thread(int gtid) {
  kmp_root_t *root = __kmp_root[gtid];
  int r;

  KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
  KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
  KMP_ASSERT(KMP_UBER_GTID(gtid));
  KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
  KMP_ASSERT(root->r.r_active == FALSE);

  r = __kmp_reset_root(gtid, root);
  KC_TRACE(10,
           ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
  return r;
}
#endif

#if KMP_DEBUG
void __kmp_task_info() {

  kmp_int32 gtid = __kmp_entry_gtid();
  kmp_int32 tid = __kmp_tid_from_gtid(gtid);
  kmp_info_t *this_thr = __kmp_threads[gtid];
  kmp_team_t *steam = this_thr->th.th_serial_team;
  kmp_team_t *team = this_thr->th.th_team;

  __kmp_printf(
      "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
      "ptask=%p\n",
      gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
      team->t.t_implicit_task_taskdata[tid].td_parent);
}
#endif // KMP_DEBUG

/* TODO optimize with one big memclr, take out what isn't needed, split
   responsibility to workers as much as possible, and delay initialization of
   features as much as possible  */
static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
                                  int tid, int gtid) { … }

/* allocate a new thread for the requesting team. this is only called from
   within a forkjoin critical section. we will first try to get an available
   thread from the thread pool. if none is available, we will fork a new one
   assuming we are able to create a new one. this should be assured, as the
   caller should check on this first. */
kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
                                  int new_tid) { … }

/* Reinitialize team for reuse.
   The hot team code calls this case at every fork barrier, so EPCC barrier
   test are extremely sensitive to changes in it, esp. writes to the team
   struct, which cause a cache invalidation in all threads.
   IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */
static void __kmp_reinitialize_team(kmp_team_t *team,
                                    kmp_internal_control_t *new_icvs,
                                    ident_t *loc) { … }

/* Initialize the team data structure.
   This assumes the t_threads and t_max_nproc are already set.
   Also, we don't touch the arguments */
static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
                                  kmp_internal_control_t *new_icvs,
                                  ident_t *loc) { … }

#if KMP_AFFINITY_SUPPORTED
static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th,
                                          int first, int last, int newp) { … }

// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
// It calculates the worker + primary thread's partition based upon the parent
// thread's partition, and binds each worker to a thread in their partition.
// The primary thread's partition should already include its current binding.
static void __kmp_partition_places(kmp_team_t *team, int update_master_only) { … }

#endif // KMP_AFFINITY_SUPPORTED

/* allocate a new team data structure to use.  take one off of the free pool if
   available */
kmp_team_t *
__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
#if OMPT_SUPPORT
                    ompt_data_t ompt_parallel_data,
#endif
                    kmp_proc_bind_t new_proc_bind,
                    kmp_internal_control_t *new_icvs,
                    int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) { … }

/* TODO implement hot-teams at all levels */
/* TODO implement lazy thread release on demand (disband request) */

/* free the team.  return it to the team pool.  release all the threads
 * associated with it */
void __kmp_free_team(kmp_root_t *root,
                     kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) { … }

/* reap the team.  destroy it, reclaim all its resources and free its memory */
kmp_team_t *__kmp_reap_team(kmp_team_t *team) { … }

// Free the thread.  Don't reap it, just place it on the pool of available
// threads.
//
// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
// binding for the affinity mechanism to be useful.
//
// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
// However, we want to avoid a potential performance problem by always
// scanning through the list to find the correct point at which to insert
// the thread (potential N**2 behavior).  To do this we keep track of the
// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
// With single-level parallelism, threads will always be added to the tail
// of the list, kept track of by __kmp_thread_pool_insert_pt.  With nested
// parallelism, all bets are off and we may need to scan through the entire
// free list.
//
// This change also has a potentially large performance benefit, for some
// applications.  Previously, as threads were freed from the hot team, they
// would be placed back on the free list in inverse order.  If the hot team
// grew back to it's original size, then the freed thread would be placed
// back on the hot team in reverse order.  This could cause bad cache
// locality problems on programs where the size of the hot team regularly
// grew and shrunk.
//
// Now, for single-level parallelism, the OMP tid is always == gtid.
void __kmp_free_thread(kmp_info_t *this_th) { … }

/* ------------------------------------------------------------------------ */

void *__kmp_launch_thread(kmp_info_t *this_thr) { … }

/* ------------------------------------------------------------------------ */

void __kmp_internal_end_dest(void *specific_gtid) { … }

#if KMP_OS_UNIX && KMP_DYNAMIC_LIB

__attribute__((destructor)) void __kmp_internal_end_dtor(void) { … }

#endif

/* [Windows] josh: when the atexit handler is called, there may still be more
   than one thread alive */
void __kmp_internal_end_atexit(void) { … }

static void __kmp_reap_thread(kmp_info_t *thread, int is_root) { … } // __kmp_reap_thread

static void __kmp_itthash_clean(kmp_info_t *th) { … }

static void __kmp_internal_end(void) { … }

void __kmp_internal_end_library(int gtid_req) { … } // __kmp_internal_end_library

void __kmp_internal_end_thread(int gtid_req) { … } // __kmp_internal_end_thread

// -----------------------------------------------------------------------------
// Library registration stuff.

static long __kmp_registration_flag = …;
// Random value used to indicate library initialization.
static char *__kmp_registration_str = …;
// Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.

static inline char *__kmp_reg_status_name() { … } // __kmp_reg_status_get

#if defined(KMP_USE_SHM)
bool __kmp_shm_available = …;
bool __kmp_tmp_available = …;
// If /dev/shm is not accessible, we will create a temporary file under /tmp.
char *temp_reg_status_file_name = …;
#endif

void __kmp_register_library_startup(void) { … } // func __kmp_register_library_startup

void __kmp_unregister_library(void) { … } // __kmp_unregister_library

// End of Library registration stuff.
// -----------------------------------------------------------------------------

#if KMP_MIC_SUPPORTED

static void __kmp_check_mic_type() { … }

#endif /* KMP_MIC_SUPPORTED */

#if KMP_HAVE_UMWAIT
static void __kmp_user_level_mwait_init() { … }
#elif KMP_HAVE_MWAIT
#ifndef AT_INTELPHIUSERMWAIT
// Spurious, non-existent value that should always fail to return anything.
// Will be replaced with the correct value when we know that.
#define AT_INTELPHIUSERMWAIT …
#endif
// getauxval() function is available in RHEL7 and SLES12. If a system with an
// earlier OS is used to build the RTL, we'll use the following internal
// function when the entry is not found.
unsigned long getauxval(unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
unsigned long getauxval(unsigned long) { return 0; }

static void __kmp_user_level_mwait_init() {
  // When getauxval() and correct value of AT_INTELPHIUSERMWAIT are available
  // use them to find if the user-level mwait is enabled. Otherwise, forcibly
  // set __kmp_mwait_enabled=TRUE on Intel MIC if the environment variable
  // KMP_USER_LEVEL_MWAIT was set to TRUE.
  if (__kmp_mic_type == mic3) {
    unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
    if ((res & 0x1) || __kmp_user_level_mwait) {
      __kmp_mwait_enabled = TRUE;
      if (__kmp_user_level_mwait) {
        KMP_INFORM(EnvMwaitWarn);
      }
    } else {
      __kmp_mwait_enabled = FALSE;
    }
  }
  KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
                "__kmp_mwait_enabled = %d\n",
                __kmp_mic_type, __kmp_mwait_enabled));
}
#endif /* KMP_HAVE_UMWAIT */

static void __kmp_do_serial_initialize(void) { … }

void __kmp_serial_initialize(void) { … }

static void __kmp_do_middle_initialize(void) { … }

void __kmp_middle_initialize(void) { … }

void __kmp_parallel_initialize(void) { … }

void __kmp_hidden_helper_initialize() { … }

/* ------------------------------------------------------------------------ */

void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
                                   kmp_team_t *team) { … }

void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
                                  kmp_team_t *team) { … }

int __kmp_invoke_task_func(int gtid) { … }

void __kmp_teams_master(int gtid) { … }

int __kmp_invoke_teams_master(int gtid) { … }

/* this sets the requested number of threads for the next parallel region
   encountered by this team. since this should be enclosed in the forkjoin
   critical section it should avoid race conditions with asymmetrical nested
   parallelism */
void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) { … }

void __kmp_push_num_threads_list(ident_t *id, int gtid, kmp_uint32 list_length,
                                 int *num_threads_list) { … }

void __kmp_set_strict_num_threads(ident_t *loc, int gtid, int sev,
                                  const char *msg) { … }

static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams,
                                    int num_threads) { … }

/* this sets the requested number of teams for the teams region and/or
   the number of threads for the next parallel region encountered  */
void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
                          int num_threads) { … }

/* This sets the requested number of teams for the teams region and/or
   the number of threads for the next parallel region encountered  */
void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb,
                             int num_teams_ub, int num_threads) { … }

// Set the proc_bind var to use in the following parallel region.
void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) { … }

/* Launch the worker threads into the microtask. */

void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) { … }

void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) { … }

/* ------------------------------------------------------------------------ */

#ifdef USE_LOAD_BALANCE

// Return the worker threads actively spinning in the hot team, if we
// are at the outermost level of parallelism.  Otherwise, return 0.
static int __kmp_active_hot_team_nproc(kmp_root_t *root) { … }

// Perform an automatic adjustment to the number of
// threads used by the next parallel region.
static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) { … } // __kmp_load_balance_nproc()

#endif /* USE_LOAD_BALANCE */

/* ------------------------------------------------------------------------ */

/* NOTE: this is called with the __kmp_init_lock held */
void __kmp_cleanup(void) { … }

/* ------------------------------------------------------------------------ */

int __kmp_ignore_mppbeg(void) { … }

int __kmp_ignore_mppend(void) { … }

void __kmp_internal_begin(void) { … }

/* ------------------------------------------------------------------------ */

void __kmp_user_set_library(enum library_type arg) { … }

void __kmp_aux_set_stacksize(size_t arg) { … }

/* set the behaviour of the runtime library */
/* TODO this can cause some odd behaviour with sibling parallelism... */
void __kmp_aux_set_library(enum library_type arg) { … }

/* Getting team information common for all team API */
// Returns NULL if not in teams construct
static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) { … }

int __kmp_aux_get_team_num() { … }

int __kmp_aux_get_num_teams() { … }

/* ------------------------------------------------------------------------ */

/*
 * Affinity Format Parser
 *
 * Field is in form of: %[[[0].]size]type
 * % and type are required (%% means print a literal '%')
 * type is either single char or long name surrounded by {},
 * e.g., N or {num_threads}
 * 0 => leading zeros
 * . => right justified when size is specified
 * by default output is left justified
 * size is the *minimum* field length
 * All other characters are printed as is
 *
 * Available field types:
 * L {thread_level}      - omp_get_level()
 * n {thread_num}        - omp_get_thread_num()
 * h {host}              - name of host machine
 * P {process_id}        - process id (integer)
 * T {thread_identifier} - native thread identifier (integer)
 * N {num_threads}       - omp_get_num_threads()
 * A {ancestor_tnum}     - omp_get_ancestor_thread_num(omp_get_level()-1)
 * a {thread_affinity}   - comma separated list of integers or integer ranges
 *                         (values of affinity mask)
 *
 * Implementation-specific field types can be added
 * If a type is unknown, print "undefined"
 */

// Structure holding the short name, long name, and corresponding data type
// for snprintf.  A table of these will represent the entire valid keyword
// field types.
kmp_affinity_format_field_t;

static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = …;

// Return the number of characters it takes to hold field
static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th,
                                            const char **ptr,
                                            kmp_str_buf_t *field_buffer) { … }

/*
 * Return number of characters needed to hold the affinity string
 * (not including null byte character)
 * The resultant string is printed to buffer, which the caller can then
 * handle afterwards
 */
size_t __kmp_aux_capture_affinity(int gtid, const char *format,
                                  kmp_str_buf_t *buffer) { … }

// Displays the affinity string to stdout
void __kmp_aux_display_affinity(int gtid, const char *format) { … }

/* ------------------------------------------------------------------------ */
void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) { … }

void __kmp_aux_set_defaults(char const *str, size_t len) { … } // __kmp_aux_set_defaults

/* ------------------------------------------------------------------------ */
/* internal fast reduction routines */

PACKED_REDUCTION_METHOD_T
__kmp_determine_reduction_method(
    ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
    void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
    kmp_critical_name *lck) { … }
// this function is for testing set/get/determine reduce method
kmp_int32 __kmp_get_reduce_method(void) { … }

// Soft pause sets up threads to ignore blocktime and just go to sleep.
// Spin-wait code checks __kmp_pause_status and reacts accordingly.
void __kmp_soft_pause() { … }

// Hard pause shuts down the runtime completely.  Resume happens naturally when
// OpenMP is used subsequently.
void __kmp_hard_pause() { … }

// Soft resume sets __kmp_pause_status, and wakes up all threads.
void __kmp_resume_if_soft_paused() { … }

// This function is called via __kmpc_pause_resource. Returns 0 if successful.
// TODO: add warning messages
int __kmp_pause_resource(kmp_pause_status_t level) { … }

void __kmp_omp_display_env(int verbose) { … }

// The team size is changing, so distributed barrier must be modified
void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
                               int new_nthreads) { … }

void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads) { … }

// Globals and functions for hidden helper task
kmp_info_t **__kmp_hidden_helper_threads;
kmp_info_t *__kmp_hidden_helper_main_thread;
std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
#if KMP_OS_LINUX
kmp_int32 __kmp_hidden_helper_threads_num = …;
kmp_int32 __kmp_enable_hidden_helper = …;
#else
kmp_int32 __kmp_hidden_helper_threads_num = 0;
kmp_int32 __kmp_enable_hidden_helper = FALSE;
#endif

namespace {
std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;

void __kmp_hidden_helper_wrapper_fn(int *gtid, int *, ...) { … }
} // namespace

void __kmp_hidden_helper_threads_initz_routine() { … }

/* Nesting Mode:
   Set via KMP_NESTING_MODE, which takes an integer.
   Note: we skip duplicate topology levels, and skip levels with only
      one entity.
   KMP_NESTING_MODE=0 is the default, and doesn't use nesting mode.
   KMP_NESTING_MODE=1 sets as many nesting levels as there are distinct levels
      in the topology, and initializes the number of threads at each of those
      levels to the number of entities at each level, respectively, below the
      entity at the parent level.
   KMP_NESTING_MODE=N, where N>1, attempts to create up to N nesting levels,
      but starts with nesting OFF -- max-active-levels-var is 1 -- and requires
      the user to turn nesting on explicitly. This is an even more experimental
      option to this experimental feature, and may change or go away in the
      future.
*/

// Allocate space to store nesting levels
void __kmp_init_nesting_mode() { … }

// Set # threads for top levels of nesting; must be called after topology set
void __kmp_set_nesting_mode_threads() { … }

// Empty symbols to export (see exports_so.txt) when feature is disabled
extern "C" {
#if !KMP_STATS_ENABLED
void __kmp_reset_stats() { … }
#endif
#if !USE_DEBUGGER
int __kmp_omp_debug_struct_info = …;
int __kmp_debugging = …;
#endif
#if !USE_ITT_BUILD || !USE_ITT_NOTIFY
void __kmp_itt_fini_ittlib() {}
void __kmp_itt_init_ittlib() {}
#endif
}

// end of file
llvm/openmp/runtime/src/kmp_runtime.cpp