/* * kmp_csupport.cpp -- kfront linkage support for OpenMP. */ //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #define __KMP_IMP #include "omp.h" /* extern "C" declarations of user-visible routines */ #include "kmp.h" #include "kmp_error.h" #include "kmp_i18n.h" #include "kmp_itt.h" #include "kmp_lock.h" #include "kmp_stats.h" #include "kmp_utils.h" #include "ompt-specific.h" #define MAX_MESSAGE … // flags will be used in future, e.g. to implement openmp_strict library // restrictions /*! * @ingroup STARTUP_SHUTDOWN * @param loc in source location information * @param flags in for future use (currently ignored) * * Initialize the runtime library. This call is optional; if it is not made then * it will be implicitly called by attempts to use other library functions. */ void __kmpc_begin(ident_t *loc, kmp_int32 flags) { … } /*! * @ingroup STARTUP_SHUTDOWN * @param loc source location information * * Shutdown the runtime library. This is also optional, and even if called will * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to * zero. */ void __kmpc_end(ident_t *loc) { … } /*! @ingroup THREAD_STATES @param loc Source location information. @return The global thread index of the active thread. This function can be called in any context. If the runtime has ony been entered at the outermost level from a single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is that which would be returned by omp_get_thread_num() in the outermost active parallel construct. (Or zero if there is no active parallel construct, since the primary thread is necessarily thread zero). If multiple non-OpenMP threads all enter an OpenMP construct then this will be a unique thread identifier among all the threads created by the OpenMP runtime (but the value cannot be defined in terms of OpenMP thread ids returned by omp_get_thread_num()). */ kmp_int32 __kmpc_global_thread_num(ident_t *loc) { … } /*! @ingroup THREAD_STATES @param loc Source location information. @return The number of threads under control of the OpenMP<sup>*</sup> runtime This function can be called in any context. It returns the total number of threads under the control of the OpenMP runtime. That is not a number that can be determined by any OpenMP standard calls, since the library may be called from more than one non-OpenMP thread, and this reflects the total over all such calls. Similarly the runtime maintains underlying threads even when they are not active (since the cost of creating and destroying OS threads is high), this call counts all such threads even if they are not waiting for work. */ kmp_int32 __kmpc_global_num_threads(ident_t *loc) { … } /*! @ingroup THREAD_STATES @param loc Source location information. @return The thread number of the calling thread in the innermost active parallel construct. */ kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { … } /*! @ingroup THREAD_STATES @param loc Source location information. @return The number of threads in the innermost active parallel construct. */ kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { … } /*! * @ingroup DEPRECATED * @param loc location description * * This function need not be called. It always returns TRUE. */ kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { … } /*! @ingroup THREAD_STATES @param loc Source location information. @return 1 if this thread is executing inside an active parallel region, zero if not. */ kmp_int32 __kmpc_in_parallel(ident_t *loc) { … } /*! @ingroup PARALLEL @param loc source location information @param global_tid global thread number @param num_threads number of threads requested for this parallel construct Set the number of threads to be used by the next fork spawned by this thread. This call is only required if the parallel construct has a `num_threads` clause. */ void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads) { … } void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads, int severity, const char *message) { … } /*! @ingroup PARALLEL @param loc source location information @param global_tid global thread number @param list_length number of entries in the num_threads_list array @param num_threads_list array of numbers of threads requested for this parallel construct and subsequent nested parallel constructs Set the number of threads to be used by the next fork spawned by this thread, and some nested forks as well. This call is only required if the parallel construct has a `num_threads` clause that has a list of integers as the argument. */ void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid, kmp_uint32 list_length, kmp_int32 *num_threads_list) { … } void __kmpc_push_num_threads_list_strict(ident_t *loc, kmp_int32 global_tid, kmp_uint32 list_length, kmp_int32 *num_threads_list, int severity, const char *message) { … } void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { … } void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind) { … } /*! @ingroup PARALLEL @param loc source location information @param argc total number of arguments in the ellipsis @param microtask pointer to callback routine consisting of outlined parallel construct @param ... pointers to shared variables that aren't global Do the actual fork and call the microtask in the relevant number of threads. */ void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { … } /*! @ingroup PARALLEL @param loc source location information @param microtask pointer to callback routine consisting of outlined parallel construct @param cond condition for running in parallel @param args struct of pointers to shared variables that aren't global Perform a fork only if the condition is true. */ void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, kmp_int32 cond, void *args) { … } /*! @ingroup PARALLEL @param loc source location information @param global_tid global thread number @param num_teams number of teams requested for the teams construct @param num_threads number of threads per team requested for the teams construct Set the number of teams to be used by the teams construct. This call is only required if the teams construct has a `num_teams` clause or a `thread_limit` clause (or both). */ void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads) { … } /*! @ingroup PARALLEL @param loc source location information @param global_tid global thread number @param thread_limit limit on number of threads which can be created within the current task Set the thread_limit for the current task This call is there to support `thread_limit` clause on the `target` construct */ void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit) { … } /*! @ingroup PARALLEL @param loc source location information @param global_tid global thread number @param num_teams_lb lower bound on number of teams requested for the teams construct @param num_teams_ub upper bound on number of teams requested for the teams construct @param num_threads number of threads per team requested for the teams construct Set the number of teams to be used by the teams construct. The number of initial teams cretaed will be greater than or equal to the lower bound and less than or equal to the upper bound. This call is only required if the teams construct has a `num_teams` clause or a `thread_limit` clause (or both). */ void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads) { … } /*! @ingroup PARALLEL @param loc source location information @param argc total number of arguments in the ellipsis @param microtask pointer to callback routine consisting of outlined teams construct @param ... pointers to shared variables that aren't global Do the actual fork and call the microtask in the relevant number of threads. */ void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { … } // I don't think this function should ever have been exported. // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated // openmp code ever called it, but it's been exported from the RTL for so // long that I'm afraid to remove the definition. int __kmpc_invoke_task_func(int gtid) { … } /*! @ingroup PARALLEL @param loc source location information @param global_tid global thread number Enter a serialized parallel construct. This interface is used to handle a conditional parallel region, like this, @code #pragma omp parallel if (condition) @endcode when the condition is false. */ void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { … } /*! @ingroup PARALLEL @param loc source location information @param global_tid global thread number Leave a serialized parallel construct. */ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { … } /*! @ingroup SYNCHRONIZATION @param loc source location information. Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though depending on the memory ordering convention obeyed by the compiler even that may not be necessary). */ void __kmpc_flush(ident_t *loc) { … } /* -------------------------------------------------------------------------- */ /*! @ingroup SYNCHRONIZATION @param loc source location information @param global_tid thread id. Execute a barrier. */ void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { … } /* The BARRIER for a MASTER section is always explicit */ /*! @ingroup WORK_SHARING @param loc source location information. @param global_tid global thread number . @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. */ kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { … } /*! @ingroup WORK_SHARING @param loc source location information. @param global_tid global thread number . Mark the end of a <tt>master</tt> region. This should only be called by the thread that executes the <tt>master</tt> region. */ void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { … } /*! @ingroup WORK_SHARING @param loc source location information. @param global_tid global thread number. @param filter result of evaluating filter clause on thread global_tid, or zero if no filter clause present @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise. */ kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) { … } /*! @ingroup WORK_SHARING @param loc source location information. @param global_tid global thread number . Mark the end of a <tt>masked</tt> region. This should only be called by the thread that executes the <tt>masked</tt> region. */ void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) { … } /*! @ingroup WORK_SHARING @param loc source location information. @param gtid global thread number. Start execution of an <tt>ordered</tt> construct. */ void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { … } /*! @ingroup WORK_SHARING @param loc source location information. @param gtid global thread number. End execution of an <tt>ordered</tt> construct. */ void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { … } #if KMP_USE_DYNAMIC_LOCK static __forceinline void __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, kmp_int32 gtid, kmp_indirect_locktag_t tag) { … } // Fast-path acquire tas lock #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) … // Fast-path test tas lock #define KMP_TEST_TAS_LOCK(lock, gtid, rc) … // Fast-path release tas lock #define KMP_RELEASE_TAS_LOCK(lock, gtid) … #if KMP_USE_FUTEX #include <sys/syscall.h> #include <unistd.h> #ifndef FUTEX_WAIT #define FUTEX_WAIT … #endif #ifndef FUTEX_WAKE #define FUTEX_WAKE … #endif // Fast-path acquire futex lock #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) … // Fast-path test futex lock #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) … // Fast-path release futex lock #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) … #endif // KMP_USE_FUTEX #else // KMP_USE_DYNAMIC_LOCK static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, ident_t const *loc, kmp_int32 gtid) { kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; // Because of the double-check, the following load doesn't need to be volatile kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); if (lck == NULL) { void *idx; // Allocate & initialize the lock. // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); __kmp_init_user_lock_with_checks(lck); __kmp_set_user_lock_location(lck, loc); #if USE_ITT_BUILD __kmp_itt_critical_creating(lck); // __kmp_itt_critical_creating() should be called *before* the first usage // of underlying lock. It is the only place where we can guarantee it. There // are chances the lock will destroyed with no usage, but it is not a // problem, because this is not real event seen by user but rather setting // name for object (lock). See more details in kmp_itt.h. #endif /* USE_ITT_BUILD */ // Use a cmpxchg instruction to slam the start of the critical section with // the lock pointer. If another thread beat us to it, deallocate the lock, // and use the lock that the other thread allocated. int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); if (status == 0) { // Deallocate the lock and reload the value. #if USE_ITT_BUILD __kmp_itt_critical_destroyed(lck); // Let ITT know the lock is destroyed and the same memory location may be reused // for another purpose. #endif /* USE_ITT_BUILD */ __kmp_destroy_user_lock_with_checks(lck); __kmp_user_lock_free(&idx, gtid, lck); lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); KMP_DEBUG_ASSERT(lck != NULL); } } return lck; } #endif // KMP_USE_DYNAMIC_LOCK /*! @ingroup WORK_SHARING @param loc source location information. @param global_tid global thread number. @param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or some other suitably unique value. Enter code protected by a `critical` construct. This function blocks until the executing thread can enter the critical section. */ void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit) { … } #if KMP_USE_DYNAMIC_LOCK // Converts the given hint to an internal lock implementation static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { … } #if OMPT_SUPPORT && OMPT_OPTIONAL #if KMP_USE_DYNAMIC_LOCK static kmp_mutex_impl_t __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { … } #else // For locks without dynamic binding static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { switch (__kmp_user_lock_kind) { case lk_tas: return kmp_mutex_impl_spin; #if KMP_USE_FUTEX case lk_futex: #endif case lk_ticket: case lk_queuing: case lk_drdpa: return kmp_mutex_impl_queuing; #if KMP_USE_TSX case lk_hle: case lk_rtm_queuing: case lk_rtm_spin: case lk_adaptive: return kmp_mutex_impl_speculative; #endif default: return kmp_mutex_impl_none; } } #endif // KMP_USE_DYNAMIC_LOCK #endif // OMPT_SUPPORT && OMPT_OPTIONAL /*! @ingroup WORK_SHARING @param loc source location information. @param global_tid global thread number. @param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or some other suitably unique value. @param hint the lock hint. Enter code protected by a `critical` construct with a hint. The hint value is used to suggest a lock implementation. This function blocks until the executing thread can enter the critical section unless the hint suggests use of speculative execution and the hardware supports it. */ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint) { … } // __kmpc_critical_with_hint #endif // KMP_USE_DYNAMIC_LOCK /*! @ingroup WORK_SHARING @param loc source location information. @param global_tid global thread number . @param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or some other suitably unique value. Leave a critical section, releasing any lock that was held during its execution. */ void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit) { … } /*! @ingroup SYNCHRONIZATION @param loc source location information @param global_tid thread id. @return one if the thread should execute the master block, zero otherwise Start execution of a combined barrier and master. The barrier is executed inside this function. */ kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { … } /*! @ingroup SYNCHRONIZATION @param loc source location information @param global_tid thread id. Complete the execution of a combined barrier and master. This function should only be called at the completion of the <tt>master</tt> code. Other threads will still be waiting at the barrier and this call releases them. */ void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { … } /*! @ingroup SYNCHRONIZATION @param loc source location information @param global_tid thread id. @return one if the thread should execute the master block, zero otherwise Start execution of a combined barrier and master(nowait) construct. The barrier is executed inside this function. There is no equivalent "end" function, since the */ kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { … } /* The BARRIER for a SINGLE process section is always explicit */ /*! @ingroup WORK_SHARING @param loc source location information @param global_tid global thread number @return One if this thread should execute the single construct, zero otherwise. Test whether to execute a <tt>single</tt> construct. There are no implicit barriers in the two "single" calls, rather the compiler should introduce an explicit barrier if it is required. */ kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { … } /*! @ingroup WORK_SHARING @param loc source location information @param global_tid global thread number Mark the end of a <tt>single</tt> construct. This function should only be called by the thread that executed the block of code protected by the `single` construct. */ void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { … } /*! @ingroup WORK_SHARING @param loc Source location @param global_tid Global thread id Mark the end of a statically scheduled loop. */ void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { … } // User routines which take C-style arguments (call by value) // different from the Fortran equivalent routines void ompc_set_num_threads(int arg) { … } void ompc_set_dynamic(int flag) { … } void ompc_set_nested(int flag) { … } void ompc_set_max_active_levels(int max_active_levels) { … } void ompc_set_schedule(omp_sched_t kind, int modifier) { … } int ompc_get_ancestor_thread_num(int level) { … } int ompc_get_team_size(int level) { … } /* OpenMP 5.0 Affinity Format API */ void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) { … } size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) { … } void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) { … } size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size, char const *format) { … } void kmpc_set_stacksize(int arg) { … } void kmpc_set_stacksize_s(size_t arg) { … } void kmpc_set_blocktime(int arg) { … } void kmpc_set_library(int arg) { … } void kmpc_set_defaults(char const *str) { … } void kmpc_set_disp_num_buffers(int arg) { … } int kmpc_set_affinity_mask_proc(int proc, void **mask) { … } int kmpc_unset_affinity_mask_proc(int proc, void **mask) { … } int kmpc_get_affinity_mask_proc(int proc, void **mask) { … } /* -------------------------------------------------------------------------- */ /*! @ingroup THREADPRIVATE @param loc source location information @param gtid global thread number @param cpy_size size of the cpy_data buffer @param cpy_data pointer to data to be copied @param cpy_func helper function to call for copying data @param didit flag variable: 1=single thread; 0=not single thread __kmpc_copyprivate implements the interface for the private data broadcast needed for the copyprivate clause associated with a single region in an OpenMP<sup>*</sup> program (both C and Fortran). All threads participating in the parallel region call this routine. One of the threads (called the single thread) should have the <tt>didit</tt> variable set to 1 and all other threads should have that variable set to 0. All threads pass a pointer to a data buffer (cpy_data) that they have built. The OpenMP specification forbids the use of nowait on the single region when a copyprivate clause is present. However, @ref __kmpc_copyprivate implements a barrier internally to avoid race conditions, so the code generation for the single region should avoid generating a barrier after the call to @ref __kmpc_copyprivate. The <tt>gtid</tt> parameter is the global thread id for the current thread. The <tt>loc</tt> parameter is a pointer to source location information. Internal implementation: The single thread will first copy its descriptor address (cpy_data) to a team-private location, then the other threads will each call the function pointed to by the parameter cpy_func, which carries out the copy by copying the data using the cpy_data buffer. The cpy_func routine used for the copy and the contents of the data area defined by cpy_data and cpy_size may be built in any fashion that will allow the copy to be done. For instance, the cpy_data buffer can hold the actual data to be copied or it may hold a list of pointers to the data. The cpy_func routine must interpret the cpy_data buffer appropriately. The interface to cpy_func is as follows: @code void cpy_func( void *destination, void *source ) @endcode where void *destination is the cpy_data pointer for the thread being copied to and void *source is the cpy_data pointer for the thread being copied from. */ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void (*cpy_func)(void *, void *), kmp_int32 didit) { … } /* --------------------------------------------------------------------------*/ /*! @ingroup THREADPRIVATE @param loc source location information @param gtid global thread number @param cpy_data pointer to the data to be saved/copied or 0 @return the saved pointer to the data __kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate: __kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so coming from single), and returns that pointer in all calls (for single thread it's not needed). This version doesn't do any actual data copying. Data copying has to be done somewhere else, e.g. inline in the generated code. Due to this, this function doesn't have any barrier at the end of the function, like __kmpc_copyprivate does, so generated code needs barrier after copying of all data was done. */ void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) { … } /* -------------------------------------------------------------------------- */ #define INIT_LOCK … #define INIT_NESTED_LOCK … #define ACQUIRE_LOCK … #define ACQUIRE_LOCK_TIMED … #define ACQUIRE_NESTED_LOCK … #define ACQUIRE_NESTED_LOCK_TIMED … #define RELEASE_LOCK … #define RELEASE_NESTED_LOCK … #define TEST_LOCK … #define TEST_NESTED_LOCK … #define DESTROY_LOCK … #define DESTROY_NESTED_LOCK … // TODO: Make check abort messages use location info & pass it into // with_checks routines #if KMP_USE_DYNAMIC_LOCK // internal lock initializer static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq) { … } // internal nest lock initializer static __forceinline void __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq) { … } /* initialize the lock with a hint */ void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint) { … } /* initialize the lock with a hint */ void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint) { … } #endif // KMP_USE_DYNAMIC_LOCK /* initialize the lock */ void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { … } // __kmpc_init_lock /* initialize the lock */ void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { … } // __kmpc_init_nest_lock void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { … } // __kmpc_destroy_lock /* destroy the lock */ void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { … } // __kmpc_destroy_nest_lock void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { … } void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { … } void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { … } /* release the lock */ void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { … } /* try to acquire the lock */ int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { … } /* try to acquire the lock */ int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { … } // Interface to fast scalable reduce methods routines // keep the selected method in a thread local structure for cross-function // usage: will be used in __kmpc_end_reduce* functions; // another solution: to re-determine the method one more time in // __kmpc_end_reduce* functions (new prototype required then) // AT: which solution is better? #define __KMP_SET_REDUCTION_METHOD … #define __KMP_GET_REDUCTION_METHOD … // description of the packed_reduction_method variable: look at the macros in // kmp.h // used in a critical section reduce block static __forceinline void __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit) { … } // used in a critical section reduce block static __forceinline void __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit) { … } // __kmp_end_critical_section_reduce_block static __forceinline int __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, int *task_state) { … } static __forceinline void __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { … } /* 2.a.i. Reduce Block without a terminating barrier */ /*! @ingroup SYNCHRONIZATION @param loc source location information @param global_tid global thread number @param num_vars number of items (variables) to be reduced @param reduce_size size of data in bytes to be reduced @param reduce_data pointer to data to be reduced @param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data @param lck pointer to the unique lock data structure @result 1 for the primary thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed The nowait version is used for a reduce clause with the nowait argument. */ kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck) { … } /*! @ingroup SYNCHRONIZATION @param loc source location information @param global_tid global thread id. @param lck pointer to the unique lock data structure Finish the execution of a reduce nowait. */ void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck) { … } /* 2.a.ii. Reduce Block with a terminating barrier */ /*! @ingroup SYNCHRONIZATION @param loc source location information @param global_tid global thread number @param num_vars number of items (variables) to be reduced @param reduce_size size of data in bytes to be reduced @param reduce_data pointer to data to be reduced @param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data @param lck pointer to the unique lock data structure @result 1 for the primary thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed A blocking reduce that includes an implicit barrier. */ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck) { … } /*! @ingroup SYNCHRONIZATION @param loc source location information @param global_tid global thread id. @param lck pointer to the unique lock data structure Finish the execution of a blocking reduce. The <tt>lck</tt> pointer must be the same as that used in the corresponding start function. */ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck) { … } #undef __KMP_GET_REDUCTION_METHOD #undef __KMP_SET_REDUCTION_METHOD /* end of interface to fast scalable reduce routines */ kmp_uint64 __kmpc_get_taskid() { … } // __kmpc_get_taskid kmp_uint64 __kmpc_get_parent_taskid() { … } // __kmpc_get_parent_taskid /*! @ingroup WORK_SHARING @param loc source location information. @param gtid global thread number. @param num_dims number of associated doacross loops. @param dims info on loops bounds. Initialize doacross loop information. Expect compiler send us inclusive bounds, e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. */ void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims) { … } void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { … } void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { … } void __kmpc_doacross_fini(ident_t *loc, int gtid) { … } /* OpenMP 5.1 Memory Management routines */ void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { … } void *omp_aligned_alloc(size_t align, size_t size, omp_allocator_handle_t allocator) { … } void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { … } void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size, omp_allocator_handle_t allocator) { … } void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, omp_allocator_handle_t free_allocator) { … } void omp_free(void *ptr, omp_allocator_handle_t allocator) { … } /* end of OpenMP 5.1 Memory Management routines */ int __kmpc_get_target_offload(void) { … } int __kmpc_pause_resource(kmp_pause_status_t level) { … } void __kmpc_error(ident_t *loc, int severity, const char *message) { … } // Mark begin of scope directive. void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { … } // Mark end of scope directive void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { … } #ifdef KMP_USE_VERSION_SYMBOLS // For GOMP compatibility there are two versions of each omp_* API. // One is the plain C symbol and one is the Fortran symbol with an appended // underscore. When we implement a specific ompc_* version of an omp_* // function, we want the plain GOMP versioned symbol to alias the ompc_* version // instead of the Fortran versions in kmp_ftn_entry.h extern "C" { // Have to undef these from omp.h so they aren't translated into // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below #ifdef omp_set_affinity_format #undef omp_set_affinity_format #endif #ifdef omp_get_affinity_format #undef omp_get_affinity_format #endif #ifdef omp_display_affinity #undef omp_display_affinity #endif #ifdef omp_capture_affinity #undef omp_capture_affinity #endif KMP_VERSION_OMPC_SYMBOL(…); KMP_VERSION_OMPC_SYMBOL(…); KMP_VERSION_OMPC_SYMBOL(…); KMP_VERSION_OMPC_SYMBOL(…); } // extern "C" #endif