llvm/openmp/runtime/src/z_Linux_util.cpp

/*
 * z_Linux_util.cpp -- platform specific routines.
 */

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "kmp.h"
#include "kmp_affinity.h"
#include "kmp_i18n.h"
#include "kmp_io.h"
#include "kmp_itt.h"
#include "kmp_lock.h"
#include "kmp_stats.h"
#include "kmp_str.h"
#include "kmp_wait_release.h"
#include "kmp_wrapper_getpid.h"

#if !KMP_OS_DRAGONFLY && !KMP_OS_FREEBSD && !KMP_OS_NETBSD && !KMP_OS_OPENBSD
#include <alloca.h>
#endif
#include <math.h> // HUGE_VAL.
#if KMP_OS_LINUX
#include <semaphore.h>
#endif // KMP_OS_LINUX
#include <sys/resource.h>
#if KMP_OS_AIX
#include <sys/ldr.h>
#include <libperfstat.h>
#else
#include <sys/syscall.h>
#endif
#include <sys/time.h>
#include <sys/times.h>
#include <unistd.h>

#if KMP_OS_LINUX
#include <sys/sysinfo.h>
#if KMP_USE_FUTEX
// We should really include <futex.h>, but that causes compatibility problems on
// different Linux* OS distributions that either require that you include (or
// break when you try to include) <pci/types.h>. Since all we need is the two
// macros below (which are part of the kernel ABI, so can't change) we just
// define the constants here and don't include <futex.h>
#ifndef FUTEX_WAIT
#define FUTEX_WAIT
#endif
#ifndef FUTEX_WAKE
#define FUTEX_WAKE
#endif
#endif
#elif KMP_OS_DARWIN
#include <mach/mach.h>
#include <sys/sysctl.h>
#elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD
#include <sys/types.h>
#include <sys/sysctl.h>
#include <sys/user.h>
#include <pthread_np.h>
#if KMP_OS_DRAGONFLY
#include <kvm.h>
#endif
#elif KMP_OS_NETBSD || KMP_OS_OPENBSD
#include <sys/types.h>
#include <sys/sysctl.h>
#if KMP_OS_NETBSD
#include <sched.h>
#endif
#if KMP_OS_OPENBSD
#include <pthread_np.h>
#endif
#elif KMP_OS_SOLARIS
#include <libproc.h>
#include <procfs.h>
#include <thread.h>
#include <sys/loadavg.h>
#endif

#include <ctype.h>
#include <dirent.h>
#include <fcntl.h>

struct kmp_sys_timer {};

#ifndef TIMEVAL_TO_TIMESPEC
// Convert timeval to timespec.
#define TIMEVAL_TO_TIMESPEC
#endif

// Convert timespec to nanoseconds.
#define TS2NS(timespec)

static struct kmp_sys_timer __kmp_sys_timer_data;

#if KMP_HANDLE_SIGNALS
sig_func_t;
STATIC_EFI2_WORKAROUND struct sigaction __kmp_sighldrs[NSIG];
static sigset_t __kmp_sigset;
#endif

static int __kmp_init_runtime =;

static int __kmp_fork_count =;

static pthread_condattr_t __kmp_suspend_cond_attr;
static pthread_mutexattr_t __kmp_suspend_mutex_attr;

static kmp_cond_align_t __kmp_wait_cv;
static kmp_mutex_align_t __kmp_wait_mx;

kmp_uint64 __kmp_ticks_per_msec =;
kmp_uint64 __kmp_ticks_per_usec =;

#ifdef DEBUG_SUSPEND
static void __kmp_print_cond(char *buffer, kmp_cond_align_t *cond) {
  KMP_SNPRINTF(buffer, 128, "(cond (lock (%ld, %d)), (descr (%p)))",
               cond->c_cond.__c_lock.__status, cond->c_cond.__c_lock.__spinlock,
               cond->c_cond.__c_waiting);
}
#endif

#if ((KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY ||   \
      KMP_OS_AIX) &&                                                           \
     KMP_AFFINITY_SUPPORTED)

/* Affinity support */

void __kmp_affinity_bind_thread(int which) {}

#if KMP_OS_AIX
void __kmp_affinity_determine_capable(const char *env_var) {
  // All versions of AIX support bindprocessor().

  size_t mask_size = __kmp_xproc / CHAR_BIT;
  // Round up to byte boundary.
  if (__kmp_xproc % CHAR_BIT)
    ++mask_size;

  // Round up to the mask_size_type boundary.
  if (mask_size % sizeof(__kmp_affin_mask_size))
    mask_size += sizeof(__kmp_affin_mask_size) -
                 mask_size % sizeof(__kmp_affin_mask_size);
  KMP_AFFINITY_ENABLE(mask_size);
  KA_TRACE(10,
           ("__kmp_affinity_determine_capable: "
            "AIX OS affinity interface bindprocessor functional (mask size = "
            "%" KMP_SIZE_T_SPEC ").\n",
            __kmp_affin_mask_size));
}

#else // !KMP_OS_AIX

/* Determine if we can access affinity functionality on this version of
 * Linux* OS by checking __NR_sched_{get,set}affinity system calls, and set
 * __kmp_affin_mask_size to the appropriate value (0 means not capable). */
void __kmp_affinity_determine_capable(const char *env_var) {}
#endif // KMP_OS_AIX
#endif // (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD ||                  \
           KMP_OS_DRAGONFLY || KMP_OS_AIX) && KMP_AFFINITY_SUPPORTED

#if KMP_USE_FUTEX

int __kmp_futex_determine_capable() {}

#endif // KMP_USE_FUTEX

#if (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_WASM) && (!KMP_ASM_INTRINS)
/* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to
   use compare_and_store for these routines */

kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 d) {
  kmp_int8 old_value, new_value;

  old_value = TCR_1(*p);
  new_value = old_value | d;

  while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
    KMP_CPU_PAUSE();
    old_value = TCR_1(*p);
    new_value = old_value | d;
  }
  return old_value;
}

kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 d) {
  kmp_int8 old_value, new_value;

  old_value = TCR_1(*p);
  new_value = old_value & d;

  while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
    KMP_CPU_PAUSE();
    old_value = TCR_1(*p);
    new_value = old_value & d;
  }
  return old_value;
}

kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 d) {
  kmp_uint32 old_value, new_value;

  old_value = TCR_4(*p);
  new_value = old_value | d;

  while (!KMP_COMPARE_AND_STORE_REL32(p, old_value, new_value)) {
    KMP_CPU_PAUSE();
    old_value = TCR_4(*p);
    new_value = old_value | d;
  }
  return old_value;
}

kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 d) {
  kmp_uint32 old_value, new_value;

  old_value = TCR_4(*p);
  new_value = old_value & d;

  while (!KMP_COMPARE_AND_STORE_REL32(p, old_value, new_value)) {
    KMP_CPU_PAUSE();
    old_value = TCR_4(*p);
    new_value = old_value & d;
  }
  return old_value;
}

#if KMP_ARCH_X86 || KMP_ARCH_WASM
kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 d) {
  kmp_int8 old_value, new_value;

  old_value = TCR_1(*p);
  new_value = old_value + d;

  while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
    KMP_CPU_PAUSE();
    old_value = TCR_1(*p);
    new_value = old_value + d;
  }
  return old_value;
}

kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 d) {
  kmp_int64 old_value, new_value;

  old_value = TCR_8(*p);
  new_value = old_value + d;

  while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
    KMP_CPU_PAUSE();
    old_value = TCR_8(*p);
    new_value = old_value + d;
  }
  return old_value;
}
#endif /* KMP_ARCH_X86 */

kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 d) {
  kmp_uint64 old_value, new_value;

  old_value = TCR_8(*p);
  new_value = old_value | d;
  while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
    KMP_CPU_PAUSE();
    old_value = TCR_8(*p);
    new_value = old_value | d;
  }
  return old_value;
}

kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 d) {
  kmp_uint64 old_value, new_value;

  old_value = TCR_8(*p);
  new_value = old_value & d;
  while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
    KMP_CPU_PAUSE();
    old_value = TCR_8(*p);
    new_value = old_value & d;
  }
  return old_value;
}

#endif /* (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (! KMP_ASM_INTRINS) */

void __kmp_terminate_thread(int gtid) {} //

/* Set thread stack info.
   If values are unreasonable, assume call failed and use incremental stack
   refinement method instead. Returns TRUE if the stack parameters could be
   determined exactly, FALSE if incremental refinement is necessary. */
static kmp_int32 __kmp_set_stack_info(int gtid, kmp_info_t *th) {}

static void *__kmp_launch_worker(void *thr) {}

#if KMP_USE_MONITOR
/* The monitor thread controls all of the threads in the complex */

static void *__kmp_launch_monitor(void *thr) {
  int status, old_type, old_state;
#ifdef KMP_BLOCK_SIGNALS
  sigset_t new_set;
#endif /* KMP_BLOCK_SIGNALS */
  struct timespec interval;

  KMP_MB(); /* Flush all pending memory write invalidates.  */

  KA_TRACE(10, ("__kmp_launch_monitor: #1 launched\n"));

  /* register us as the monitor thread */
  __kmp_gtid_set_specific(KMP_GTID_MONITOR);
#ifdef KMP_TDATA_GTID
  __kmp_gtid = KMP_GTID_MONITOR;
#endif

  KMP_MB();

#if USE_ITT_BUILD
  // Instruct Intel(R) Threading Tools to ignore monitor thread.
  __kmp_itt_thread_ignore();
#endif /* USE_ITT_BUILD */

  __kmp_set_stack_info(((kmp_info_t *)thr)->th.th_info.ds.ds_gtid,
                       (kmp_info_t *)thr);

  __kmp_check_stack_overlap((kmp_info_t *)thr);

#ifdef KMP_CANCEL_THREADS
  status = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old_type);
  KMP_CHECK_SYSFAIL("pthread_setcanceltype", status);
  // josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads?
  status = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old_state);
  KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
#endif

#if KMP_REAL_TIME_FIX
  // This is a potential fix which allows application with real-time scheduling
  // policy work. However, decision about the fix is not made yet, so it is
  // disabled by default.
  { // Are program started with real-time scheduling policy?
    int sched = sched_getscheduler(0);
    if (sched == SCHED_FIFO || sched == SCHED_RR) {
      // Yes, we are a part of real-time application. Try to increase the
      // priority of the monitor.
      struct sched_param param;
      int max_priority = sched_get_priority_max(sched);
      int rc;
      KMP_WARNING(RealTimeSchedNotSupported);
      sched_getparam(0, &param);
      if (param.sched_priority < max_priority) {
        param.sched_priority += 1;
        rc = sched_setscheduler(0, sched, &param);
        if (rc != 0) {
          int error = errno;
          kmp_msg_t err_code = KMP_ERR(error);
          __kmp_msg(kmp_ms_warning, KMP_MSG(CantChangeMonitorPriority),
                    err_code, KMP_MSG(MonitorWillStarve), __kmp_msg_null);
          if (__kmp_generate_warnings == kmp_warnings_off) {
            __kmp_str_free(&err_code.str);
          }
        }
      } else {
        // We cannot abort here, because number of CPUs may be enough for all
        // the threads, including the monitor thread, so application could
        // potentially work...
        __kmp_msg(kmp_ms_warning, KMP_MSG(RunningAtMaxPriority),
                  KMP_MSG(MonitorWillStarve), KMP_HNT(RunningAtMaxPriority),
                  __kmp_msg_null);
      }
    }
    // AC: free thread that waits for monitor started
    TCW_4(__kmp_global.g.g_time.dt.t_value, 0);
  }
#endif // KMP_REAL_TIME_FIX

  KMP_MB(); /* Flush all pending memory write invalidates.  */

  if (__kmp_monitor_wakeups == 1) {
    interval.tv_sec = 1;
    interval.tv_nsec = 0;
  } else {
    interval.tv_sec = 0;
    interval.tv_nsec = (KMP_NSEC_PER_SEC / __kmp_monitor_wakeups);
  }

  KA_TRACE(10, ("__kmp_launch_monitor: #2 monitor\n"));

  while (!TCR_4(__kmp_global.g.g_done)) {
    struct timespec now;
    struct timeval tval;

    /*  This thread monitors the state of the system */

    KA_TRACE(15, ("__kmp_launch_monitor: update\n"));

    status = gettimeofday(&tval, NULL);
    KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
    TIMEVAL_TO_TIMESPEC(&tval, &now);

    now.tv_sec += interval.tv_sec;
    now.tv_nsec += interval.tv_nsec;

    if (now.tv_nsec >= KMP_NSEC_PER_SEC) {
      now.tv_sec += 1;
      now.tv_nsec -= KMP_NSEC_PER_SEC;
    }

    status = pthread_mutex_lock(&__kmp_wait_mx.m_mutex);
    KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
    // AC: the monitor should not fall asleep if g_done has been set
    if (!TCR_4(__kmp_global.g.g_done)) { // check once more under mutex
      status = pthread_cond_timedwait(&__kmp_wait_cv.c_cond,
                                      &__kmp_wait_mx.m_mutex, &now);
      if (status != 0) {
        if (status != ETIMEDOUT && status != EINTR) {
          KMP_SYSFAIL("pthread_cond_timedwait", status);
        }
      }
    }
    status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex);
    KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);

    TCW_4(__kmp_global.g.g_time.dt.t_value,
          TCR_4(__kmp_global.g.g_time.dt.t_value) + 1);

    KMP_MB(); /* Flush all pending memory write invalidates.  */
  }

  KA_TRACE(10, ("__kmp_launch_monitor: #3 cleanup\n"));

#ifdef KMP_BLOCK_SIGNALS
  status = sigfillset(&new_set);
  KMP_CHECK_SYSFAIL_ERRNO("sigfillset", status);
  status = pthread_sigmask(SIG_UNBLOCK, &new_set, NULL);
  KMP_CHECK_SYSFAIL("pthread_sigmask", status);
#endif /* KMP_BLOCK_SIGNALS */

  KA_TRACE(10, ("__kmp_launch_monitor: #4 finished\n"));

  if (__kmp_global.g.g_abort != 0) {
    /* now we need to terminate the worker threads  */
    /* the value of t_abort is the signal we caught */

    int gtid;

    KA_TRACE(10, ("__kmp_launch_monitor: #5 terminate sig=%d\n",
                  __kmp_global.g.g_abort));

    /* terminate the OpenMP worker threads */
    /* TODO this is not valid for sibling threads!!
     * the uber master might not be 0 anymore.. */
    for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid)
      __kmp_terminate_thread(gtid);

    __kmp_cleanup();

    KA_TRACE(10, ("__kmp_launch_monitor: #6 raise sig=%d\n",
                  __kmp_global.g.g_abort));

    if (__kmp_global.g.g_abort > 0)
      raise(__kmp_global.g.g_abort);
  }

  KA_TRACE(10, ("__kmp_launch_monitor: #7 exit\n"));

  return thr;
}
#endif // KMP_USE_MONITOR

void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size) {} // __kmp_create_worker

#if KMP_USE_MONITOR
void __kmp_create_monitor(kmp_info_t *th) {
  pthread_t handle;
  pthread_attr_t thread_attr;
  size_t size;
  int status;
  int auto_adj_size = FALSE;

  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
    // We don't need monitor thread in case of MAX_BLOCKTIME
    KA_TRACE(10, ("__kmp_create_monitor: skipping monitor thread because of "
                  "MAX blocktime\n"));
    th->th.th_info.ds.ds_tid = 0; // this makes reap_monitor no-op
    th->th.th_info.ds.ds_gtid = 0;
    return;
  }
  KA_TRACE(10, ("__kmp_create_monitor: try to create monitor\n"));

  KMP_MB(); /* Flush all pending memory write invalidates.  */

  th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR;
  th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR;
#if KMP_REAL_TIME_FIX
  TCW_4(__kmp_global.g.g_time.dt.t_value,
        -1); // Will use it for synchronization a bit later.
#else
  TCW_4(__kmp_global.g.g_time.dt.t_value, 0);
#endif // KMP_REAL_TIME_FIX

#ifdef KMP_THREAD_ATTR
  if (__kmp_monitor_stksize == 0) {
    __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
    auto_adj_size = TRUE;
  }
  status = pthread_attr_init(&thread_attr);
  if (status != 0) {
    __kmp_fatal(KMP_MSG(CantInitThreadAttrs), KMP_ERR(status), __kmp_msg_null);
  }
  status = pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
  if (status != 0) {
    __kmp_fatal(KMP_MSG(CantSetMonitorState), KMP_ERR(status), __kmp_msg_null);
  }

#ifdef _POSIX_THREAD_ATTR_STACKSIZE
  status = pthread_attr_getstacksize(&thread_attr, &size);
  KMP_CHECK_SYSFAIL("pthread_attr_getstacksize", status);
#else
  size = __kmp_sys_min_stksize;
#endif /* _POSIX_THREAD_ATTR_STACKSIZE */
#endif /* KMP_THREAD_ATTR */

  if (__kmp_monitor_stksize == 0) {
    __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
  }
  if (__kmp_monitor_stksize < __kmp_sys_min_stksize) {
    __kmp_monitor_stksize = __kmp_sys_min_stksize;
  }

  KA_TRACE(10, ("__kmp_create_monitor: default stacksize = %lu bytes,"
                "requested stacksize = %lu bytes\n",
                size, __kmp_monitor_stksize));

retry:

/* Set stack size for this thread now. */
#ifdef _POSIX_THREAD_ATTR_STACKSIZE
  KA_TRACE(10, ("__kmp_create_monitor: setting stacksize = %lu bytes,",
                __kmp_monitor_stksize));
  status = pthread_attr_setstacksize(&thread_attr, __kmp_monitor_stksize);
  if (status != 0) {
    if (auto_adj_size) {
      __kmp_monitor_stksize *= 2;
      goto retry;
    }
    kmp_msg_t err_code = KMP_ERR(status);
    __kmp_msg(kmp_ms_warning, // should this be fatal?  BB
              KMP_MSG(CantSetMonitorStackSize, (long int)__kmp_monitor_stksize),
              err_code, KMP_HNT(ChangeMonitorStackSize), __kmp_msg_null);
    if (__kmp_generate_warnings == kmp_warnings_off) {
      __kmp_str_free(&err_code.str);
    }
  }
#endif /* _POSIX_THREAD_ATTR_STACKSIZE */

  status =
      pthread_create(&handle, &thread_attr, __kmp_launch_monitor, (void *)th);

  if (status != 0) {
#ifdef _POSIX_THREAD_ATTR_STACKSIZE
    if (status == EINVAL) {
      if (auto_adj_size && (__kmp_monitor_stksize < (size_t)0x40000000)) {
        __kmp_monitor_stksize *= 2;
        goto retry;
      }
      __kmp_fatal(KMP_MSG(CantSetMonitorStackSize, __kmp_monitor_stksize),
                  KMP_ERR(status), KMP_HNT(IncreaseMonitorStackSize),
                  __kmp_msg_null);
    }
    if (status == ENOMEM) {
      __kmp_fatal(KMP_MSG(CantSetMonitorStackSize, __kmp_monitor_stksize),
                  KMP_ERR(status), KMP_HNT(DecreaseMonitorStackSize),
                  __kmp_msg_null);
    }
#endif /* _POSIX_THREAD_ATTR_STACKSIZE */
    if (status == EAGAIN) {
      __kmp_fatal(KMP_MSG(NoResourcesForMonitorThread), KMP_ERR(status),
                  KMP_HNT(DecreaseNumberOfThreadsInUse), __kmp_msg_null);
    }
    KMP_SYSFAIL("pthread_create", status);
  }

  th->th.th_info.ds.ds_thread = handle;

#if KMP_REAL_TIME_FIX
  // Wait for the monitor thread is really started and set its *priority*.
  KMP_DEBUG_ASSERT(sizeof(kmp_uint32) ==
                   sizeof(__kmp_global.g.g_time.dt.t_value));
  __kmp_wait_4((kmp_uint32 volatile *)&__kmp_global.g.g_time.dt.t_value, -1,
               &__kmp_neq_4, NULL);
#endif // KMP_REAL_TIME_FIX

#ifdef KMP_THREAD_ATTR
  status = pthread_attr_destroy(&thread_attr);
  if (status != 0) {
    kmp_msg_t err_code = KMP_ERR(status);
    __kmp_msg(kmp_ms_warning, KMP_MSG(CantDestroyThreadAttrs), err_code,
              __kmp_msg_null);
    if (__kmp_generate_warnings == kmp_warnings_off) {
      __kmp_str_free(&err_code.str);
    }
  }
#endif

  KMP_MB(); /* Flush all pending memory write invalidates.  */

  KA_TRACE(10, ("__kmp_create_monitor: monitor created %#.8lx\n",
                th->th.th_info.ds.ds_thread));

} // __kmp_create_monitor
#endif // KMP_USE_MONITOR

void __kmp_exit_thread(int exit_status) {} // __kmp_exit_thread

#if KMP_USE_MONITOR
void __kmp_resume_monitor();

extern "C" void __kmp_reap_monitor(kmp_info_t *th) {
  int status;
  void *exit_val;

  KA_TRACE(10, ("__kmp_reap_monitor: try to reap monitor thread with handle"
                " %#.8lx\n",
                th->th.th_info.ds.ds_thread));

  // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR.
  // If both tid and gtid are 0, it means the monitor did not ever start.
  // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down.
  KMP_DEBUG_ASSERT(th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid);
  if (th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR) {
    KA_TRACE(10, ("__kmp_reap_monitor: monitor did not start, returning\n"));
    return;
  }

  KMP_MB(); /* Flush all pending memory write invalidates.  */

  /* First, check to see whether the monitor thread exists to wake it up. This
     is to avoid performance problem when the monitor sleeps during
     blocktime-size interval */

  status = pthread_kill(th->th.th_info.ds.ds_thread, 0);
  if (status != ESRCH) {
    __kmp_resume_monitor(); // Wake up the monitor thread
  }
  KA_TRACE(10, ("__kmp_reap_monitor: try to join with monitor\n"));
  status = pthread_join(th->th.th_info.ds.ds_thread, &exit_val);
  if (exit_val != th) {
    __kmp_fatal(KMP_MSG(ReapMonitorError), KMP_ERR(status), __kmp_msg_null);
  }

  th->th.th_info.ds.ds_tid = KMP_GTID_DNE;
  th->th.th_info.ds.ds_gtid = KMP_GTID_DNE;

  KA_TRACE(10, ("__kmp_reap_monitor: done reaping monitor thread with handle"
                " %#.8lx\n",
                th->th.th_info.ds.ds_thread));

  KMP_MB(); /* Flush all pending memory write invalidates.  */
}
#else
// Empty symbol to export (see exports_so.txt) when
// monitor thread feature is disabled
extern "C" void __kmp_reap_monitor(kmp_info_t *th) {}
#endif // KMP_USE_MONITOR

void __kmp_reap_worker(kmp_info_t *th) {}

#if KMP_HANDLE_SIGNALS

static void __kmp_null_handler(int signo) {} // __kmp_null_handler

static void __kmp_team_handler(int signo) {} // __kmp_team_handler

static void __kmp_sigaction(int signum, const struct sigaction *act,
                            struct sigaction *oldact) {}

static void __kmp_install_one_handler(int sig, sig_func_t handler_func,
                                      int parallel_init) {} // __kmp_install_one_handler

static void __kmp_remove_one_handler(int sig) {} // __kmp_remove_one_handler

void __kmp_install_signals(int parallel_init) {} // __kmp_install_signals

void __kmp_remove_signals(void) {} // __kmp_remove_signals

#endif // KMP_HANDLE_SIGNALS

void __kmp_enable(int new_state) {}

void __kmp_disable(int *old_state) {}

static void __kmp_atfork_prepare(void) {}

static void __kmp_atfork_parent(void) {}

/* Reset the library so execution in the child starts "all over again" with
   clean data structures in initial states.  Don't worry about freeing memory
   allocated by parent, just abandon it to be safe. */
static void __kmp_atfork_child(void) {}

void __kmp_register_atfork(void) {}

void __kmp_suspend_initialize(void) {}

void __kmp_suspend_initialize_thread(kmp_info_t *th) {}

void __kmp_suspend_uninitialize_thread(kmp_info_t *th) {}

// return true if lock obtained, false otherwise
int __kmp_try_suspend_mx(kmp_info_t *th) {}

void __kmp_lock_suspend_mx(kmp_info_t *th) {}

void __kmp_unlock_suspend_mx(kmp_info_t *th) {}

/* This routine puts the calling thread to sleep after setting the
   sleep bit for the indicated flag variable to true. */
template <class C>
static inline void __kmp_suspend_template(int th_gtid, C *flag) {}

template <bool C, bool S>
void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag) {}
template <bool C, bool S>
void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag) {}
template <bool C, bool S>
void __kmp_atomic_suspend_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag) {}
void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {}

template void __kmp_suspend_32<false, false>(int, kmp_flag_32<false, false> *);
template void __kmp_suspend_64<false, true>(int, kmp_flag_64<false, true> *);
template void __kmp_suspend_64<true, false>(int, kmp_flag_64<true, false> *);
template void
__kmp_atomic_suspend_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
template void
__kmp_atomic_suspend_64<true, false>(int, kmp_atomic_flag_64<true, false> *);

/* This routine signals the thread specified by target_gtid to wake up
   after setting the sleep bit indicated by the flag argument to FALSE.
   The target thread must already have called __kmp_suspend_template() */
template <class C>
static inline void __kmp_resume_template(int target_gtid, C *flag) {}

template <bool C, bool S>
void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag) {}
template <bool C, bool S>
void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag) {}
template <bool C, bool S>
void __kmp_atomic_resume_64(int target_gtid, kmp_atomic_flag_64<C, S> *flag) {}
void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {}

template void __kmp_resume_32<false, true>(int, kmp_flag_32<false, true> *);
template void __kmp_resume_32<false, false>(int, kmp_flag_32<false, false> *);
template void __kmp_resume_64<false, true>(int, kmp_flag_64<false, true> *);
template void
__kmp_atomic_resume_64<false, true>(int, kmp_atomic_flag_64<false, true> *);

#if KMP_USE_MONITOR
void __kmp_resume_monitor() {
  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
  int status;
#ifdef KMP_DEBUG
  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
  KF_TRACE(30, ("__kmp_resume_monitor: T#%d wants to wakeup T#%d enter\n", gtid,
                KMP_GTID_MONITOR));
  KMP_DEBUG_ASSERT(gtid != KMP_GTID_MONITOR);
#endif
  status = pthread_mutex_lock(&__kmp_wait_mx.m_mutex);
  KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
#ifdef DEBUG_SUSPEND
  {
    char buffer[128];
    __kmp_print_cond(buffer, &__kmp_wait_cv.c_cond);
    __kmp_printf("__kmp_resume_monitor: T#%d resuming T#%d: %s\n", gtid,
                 KMP_GTID_MONITOR, buffer);
  }
#endif
  status = pthread_cond_signal(&__kmp_wait_cv.c_cond);
  KMP_CHECK_SYSFAIL("pthread_cond_signal", status);
  status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex);
  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
  KF_TRACE(30, ("__kmp_resume_monitor: T#%d exiting after signaling wake up"
                " for T#%d\n",
                gtid, KMP_GTID_MONITOR));
}
#endif // KMP_USE_MONITOR

void __kmp_yield() {}

void __kmp_gtid_set_specific(int gtid) {}

int __kmp_gtid_get_specific() {}

double __kmp_read_cpu_time(void) {}

int __kmp_read_system_info(struct kmp_sys_info *info) {}

void __kmp_read_system_time(double *delta) {}

void __kmp_clear_system_time(void) {}

static int __kmp_get_xproc(void) {} // __kmp_get_xproc

int __kmp_read_from_file(char const *path, char const *format, ...) {}

void __kmp_runtime_initialize(void) {}

void __kmp_runtime_destroy(void) {}

/* Put the thread to sleep for a time period */
/* NOTE: not currently used anywhere */
void __kmp_thread_sleep(int millis) {}

/* Calculate the elapsed wall clock time for the user */
void __kmp_elapsed(double *t) {}

/* Calculate the elapsed wall clock tick for the user */
void __kmp_elapsed_tick(double *t) {}

/* Return the current time stamp in nsec */
kmp_uint64 __kmp_now_nsec() {}

#if KMP_ARCH_X86 || KMP_ARCH_X86_64
/* Measure clock ticks per millisecond */
void __kmp_initialize_system_tick() {}
#endif

/* Determine whether the given address is mapped into the current address
   space. */

int __kmp_is_address_mapped(void *addr) {} // __kmp_is_address_mapped

#ifdef USE_LOAD_BALANCE

#if KMP_OS_DARWIN || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||    \
    KMP_OS_OPENBSD || KMP_OS_SOLARIS

// The function returns the rounded value of the system load average
// during given time interval which depends on the value of
// __kmp_load_balance_interval variable (default is 60 sec, other values
// may be 300 sec or 900 sec).
// It returns -1 in case of error.
int __kmp_get_load_balance(int max) {
  double averages[3];
  int ret_avg = 0;

  int res = getloadavg(averages, 3);

  // Check __kmp_load_balance_interval to determine which of averages to use.
  // getloadavg() may return the number of samples less than requested that is
  // less than 3.
  if (__kmp_load_balance_interval < 180 && (res >= 1)) {
    ret_avg = (int)averages[0]; // 1 min
  } else if ((__kmp_load_balance_interval >= 180 &&
              __kmp_load_balance_interval < 600) &&
             (res >= 2)) {
    ret_avg = (int)averages[1]; // 5 min
  } else if ((__kmp_load_balance_interval >= 600) && (res == 3)) {
    ret_avg = (int)averages[2]; // 15 min
  } else { // Error occurred
    return -1;
  }

  return ret_avg;
}

#elif KMP_OS_AIX

// The function returns number of running (not sleeping) threads, or -1 in case
// of error.
int __kmp_get_load_balance(int max) {

  static int glb_running_threads = 0; // Saved count of the running threads for
                                      // the thread balance algorithm.
  static double glb_call_time = 0; // Thread balance algorithm call time.
  int running_threads = 0; // Number of running threads in the system.

  double call_time = 0.0;

  __kmp_elapsed(&call_time);

  if (glb_call_time &&
      (call_time - glb_call_time < __kmp_load_balance_interval))
    return glb_running_threads;

  glb_call_time = call_time;

  if (max <= 0) {
    max = INT_MAX;
  }

  // Check how many perfstat_cpu_t structures are available.
  int logical_cpus = perfstat_cpu(NULL, NULL, sizeof(perfstat_cpu_t), 0);
  if (logical_cpus <= 0) {
    glb_call_time = -1;
    return -1;
  }

  perfstat_cpu_t *cpu_stat = (perfstat_cpu_t *)KMP_INTERNAL_MALLOC(
      logical_cpus * sizeof(perfstat_cpu_t));
  if (cpu_stat == NULL) {
    glb_call_time = -1;
    return -1;
  }

  // Set first CPU as the name of the first logical CPU for which the info is
  // desired.
  perfstat_id_t first_cpu_name;
  strcpy(first_cpu_name.name, FIRST_CPU);

  // Get the stat info of logical CPUs.
  int rc = perfstat_cpu(&first_cpu_name, cpu_stat, sizeof(perfstat_cpu_t),
                        logical_cpus);
  KMP_DEBUG_ASSERT(rc == logical_cpus);
  if (rc <= 0) {
    KMP_INTERNAL_FREE(cpu_stat);
    glb_call_time = -1;
    return -1;
  }
  for (int i = 0; i < logical_cpus; ++i) {
    running_threads += cpu_stat[i].runque;
    if (running_threads >= max)
      break;
  }

  // There _might_ be a timing hole where the thread executing this
  // code gets skipped in the load balance, and running_threads is 0.
  // Assert in the debug builds only!!!
  KMP_DEBUG_ASSERT(running_threads > 0);
  if (running_threads <= 0)
    running_threads = 1;

  KMP_INTERNAL_FREE(cpu_stat);

  glb_running_threads = running_threads;

  return running_threads;
}

#else // Linux* OS

// The function returns number of running (not sleeping) threads, or -1 in case
// of error. Error could be reported if Linux* OS kernel too old (without
// "/proc" support). Counting running threads stops if max running threads
// encountered.
int __kmp_get_load_balance(int max) {} // __kmp_get_load_balance

#endif // KMP_OS_DARWIN

#endif // USE_LOAD_BALANCE

#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC ||                            \
      ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) ||                 \
      KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 ||            \
      KMP_ARCH_ARM || KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC_XCOFF ||   \
      KMP_ARCH_AARCH64_32)

// Because WebAssembly will use `call_indirect` to invoke the microtask and
// WebAssembly indirect calls check that the called signature is a precise
// match, we need to cast each microtask function pointer back from `void *` to
// its original type.
typedef void (*microtask_t0)(int *, int *);
typedef void (*microtask_t1)(int *, int *, void *);
typedef void (*microtask_t2)(int *, int *, void *, void *);
typedef void (*microtask_t3)(int *, int *, void *, void *, void *);
typedef void (*microtask_t4)(int *, int *, void *, void *, void *, void *);
typedef void (*microtask_t5)(int *, int *, void *, void *, void *, void *,
                             void *);
typedef void (*microtask_t6)(int *, int *, void *, void *, void *, void *,
                             void *, void *);
typedef void (*microtask_t7)(int *, int *, void *, void *, void *, void *,
                             void *, void *, void *);
typedef void (*microtask_t8)(int *, int *, void *, void *, void *, void *,
                             void *, void *, void *, void *);
typedef void (*microtask_t9)(int *, int *, void *, void *, void *, void *,
                             void *, void *, void *, void *, void *);
typedef void (*microtask_t10)(int *, int *, void *, void *, void *, void *,
                              void *, void *, void *, void *, void *, void *);
typedef void (*microtask_t11)(int *, int *, void *, void *, void *, void *,
                              void *, void *, void *, void *, void *, void *,
                              void *);
typedef void (*microtask_t12)(int *, int *, void *, void *, void *, void *,
                              void *, void *, void *, void *, void *, void *,
                              void *, void *);
typedef void (*microtask_t13)(int *, int *, void *, void *, void *, void *,
                              void *, void *, void *, void *, void *, void *,
                              void *, void *, void *);
typedef void (*microtask_t14)(int *, int *, void *, void *, void *, void *,
                              void *, void *, void *, void *, void *, void *,
                              void *, void *, void *, void *);
typedef void (*microtask_t15)(int *, int *, void *, void *, void *, void *,
                              void *, void *, void *, void *, void *, void *,
                              void *, void *, void *, void *, void *);

// we really only need the case with 1 argument, because CLANG always build
// a struct of pointers to shared variables referenced in the outlined function
int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
                           void *p_argv[]
#if OMPT_SUPPORT
                           ,
                           void **exit_frame_ptr
#endif
) {
#if OMPT_SUPPORT
  *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
#endif

  switch (argc) {
  default:
    fprintf(stderr, "Too many args to microtask: %d!\n", argc);
    fflush(stderr);
    exit(-1);
  case 0:
    (*(microtask_t0)pkfn)(&gtid, &tid);
    break;
  case 1:
    (*(microtask_t1)pkfn)(&gtid, &tid, p_argv[0]);
    break;
  case 2:
    (*(microtask_t2)pkfn)(&gtid, &tid, p_argv[0], p_argv[1]);
    break;
  case 3:
    (*(microtask_t3)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2]);
    break;
  case 4:
    (*(microtask_t4)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
                          p_argv[3]);
    break;
  case 5:
    (*(microtask_t5)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
                          p_argv[3], p_argv[4]);
    break;
  case 6:
    (*(microtask_t6)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
                          p_argv[3], p_argv[4], p_argv[5]);
    break;
  case 7:
    (*(microtask_t7)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
                          p_argv[3], p_argv[4], p_argv[5], p_argv[6]);
    break;
  case 8:
    (*(microtask_t8)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
                          p_argv[3], p_argv[4], p_argv[5], p_argv[6],
                          p_argv[7]);
    break;
  case 9:
    (*(microtask_t9)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
                          p_argv[3], p_argv[4], p_argv[5], p_argv[6], p_argv[7],
                          p_argv[8]);
    break;
  case 10:
    (*(microtask_t10)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
                           p_argv[3], p_argv[4], p_argv[5], p_argv[6],
                           p_argv[7], p_argv[8], p_argv[9]);
    break;
  case 11:
    (*(microtask_t11)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
                           p_argv[3], p_argv[4], p_argv[5], p_argv[6],
                           p_argv[7], p_argv[8], p_argv[9], p_argv[10]);
    break;
  case 12:
    (*(microtask_t12)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
                           p_argv[3], p_argv[4], p_argv[5], p_argv[6],
                           p_argv[7], p_argv[8], p_argv[9], p_argv[10],
                           p_argv[11]);
    break;
  case 13:
    (*(microtask_t13)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
                           p_argv[3], p_argv[4], p_argv[5], p_argv[6],
                           p_argv[7], p_argv[8], p_argv[9], p_argv[10],
                           p_argv[11], p_argv[12]);
    break;
  case 14:
    (*(microtask_t14)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
                           p_argv[3], p_argv[4], p_argv[5], p_argv[6],
                           p_argv[7], p_argv[8], p_argv[9], p_argv[10],
                           p_argv[11], p_argv[12], p_argv[13]);
    break;
  case 15:
    (*(microtask_t15)pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2],
                           p_argv[3], p_argv[4], p_argv[5], p_argv[6],
                           p_argv[7], p_argv[8], p_argv[9], p_argv[10],
                           p_argv[11], p_argv[12], p_argv[13], p_argv[14]);
    break;
  }

  return 1;
}

#endif

#if KMP_OS_LINUX
// Functions for hidden helper task
namespace {
// Condition variable for initializing hidden helper team
pthread_cond_t hidden_helper_threads_initz_cond_var;
pthread_mutex_t hidden_helper_threads_initz_lock;
volatile int hidden_helper_initz_signaled =;

// Condition variable for deinitializing hidden helper team
pthread_cond_t hidden_helper_threads_deinitz_cond_var;
pthread_mutex_t hidden_helper_threads_deinitz_lock;
volatile int hidden_helper_deinitz_signaled =;

// Condition variable for the wrapper function of main thread
pthread_cond_t hidden_helper_main_thread_cond_var;
pthread_mutex_t hidden_helper_main_thread_lock;
volatile int hidden_helper_main_thread_signaled =;

// Semaphore for worker threads. We don't use condition variable here in case
// that when multiple signals are sent at the same time, only one thread might
// be waken.
sem_t hidden_helper_task_sem;
} // namespace

void __kmp_hidden_helper_worker_thread_wait() {}

void __kmp_do_initialize_hidden_helper_threads() {}

void __kmp_hidden_helper_threads_initz_wait() {}

void __kmp_hidden_helper_initz_release() {}

void __kmp_hidden_helper_main_thread_wait() {}

void __kmp_hidden_helper_main_thread_release() {}

void __kmp_hidden_helper_worker_thread_signal() {}

void __kmp_hidden_helper_threads_deinitz_wait() {}

void __kmp_hidden_helper_threads_deinitz_release() {}
#else // KMP_OS_LINUX
void __kmp_hidden_helper_worker_thread_wait() {
  KMP_ASSERT(0 && "Hidden helper task is not supported on this OS");
}

void __kmp_do_initialize_hidden_helper_threads() {
  KMP_ASSERT(0 && "Hidden helper task is not supported on this OS");
}

void __kmp_hidden_helper_threads_initz_wait() {
  KMP_ASSERT(0 && "Hidden helper task is not supported on this OS");
}

void __kmp_hidden_helper_initz_release() {
  KMP_ASSERT(0 && "Hidden helper task is not supported on this OS");
}

void __kmp_hidden_helper_main_thread_wait() {
  KMP_ASSERT(0 && "Hidden helper task is not supported on this OS");
}

void __kmp_hidden_helper_main_thread_release() {
  KMP_ASSERT(0 && "Hidden helper task is not supported on this OS");
}

void __kmp_hidden_helper_worker_thread_signal() {
  KMP_ASSERT(0 && "Hidden helper task is not supported on this OS");
}

void __kmp_hidden_helper_threads_deinitz_wait() {
  KMP_ASSERT(0 && "Hidden helper task is not supported on this OS");
}

void __kmp_hidden_helper_threads_deinitz_release() {
  KMP_ASSERT(0 && "Hidden helper task is not supported on this OS");
}
#endif // KMP_OS_LINUX

bool __kmp_detect_shm() {}

bool __kmp_detect_tmp() {}

// end of file //