//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef __OMPX_H
#define __OMPX_H
#ifdef __AMDGCN_WAVEFRONT_SIZE
#define __WARP_SIZE __AMDGCN_WAVEFRONT_SIZE
#else
#define __WARP_SIZE 32
#endif
typedef unsigned long uint64_t;
#ifdef __cplusplus
extern "C" {
#endif
int omp_get_ancestor_thread_num(int);
int omp_get_team_size(int);
#ifdef __cplusplus
}
#endif
/// Target kernel language extensions
///
/// These extensions exist for the host to allow fallback implementations,
/// however, they cannot be arbitrarily composed with OpenMP. If the rules of
/// the kernel language are followed, the host fallbacks should behave as
/// expected since the kernel is represented as 3 sequential outer loops, one
/// for each grid dimension, and three (nested) parallel loops, one for each
/// block dimension. This fallback is not supposed to be optimal and should be
/// configurable by the user.
///
///{
#ifdef __cplusplus
extern "C" {
#endif
enum {
ompx_relaxed = __ATOMIC_RELAXED,
ompx_aquire = __ATOMIC_ACQUIRE,
ompx_release = __ATOMIC_RELEASE,
ompx_acq_rel = __ATOMIC_ACQ_REL,
ompx_seq_cst = __ATOMIC_SEQ_CST,
};
enum {
ompx_dim_x = 0,
ompx_dim_y = 1,
ompx_dim_z = 2,
};
// TODO: The following implementation is for host fallback. We need to disable
// generation of host fallback in kernel language mode.
#pragma omp begin declare variant match(device = {kind(cpu)})
/// ompx_{thread,block}_{id,dim}
///{
#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(NAME, VALUE) \
static inline int ompx_##NAME(int Dim) { return VALUE; }
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(thread_id,
omp_get_ancestor_thread_num(Dim + 1))
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_dim, omp_get_team_size(Dim + 1))
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_id, 0)
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(grid_dim, 1)
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C
///}
/// ompx_{sync_block}_{,divergent}
///{
#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(RETTY, NAME, ARGS, BODY) \
static inline RETTY ompx_##NAME(ARGS) { BODY; }
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block, int Ordering,
_Pragma("omp barrier"))
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_acq_rel, void,
ompx_sync_block(ompx_acq_rel))
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_divergent, int Ordering,
ompx_sync_block(Ordering))
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C
///}
static inline uint64_t ompx_ballot_sync(uint64_t mask, int pred) {
__builtin_trap();
}
/// ompx_shfl_down_sync_{i,f,l,d}
///{
#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(TYPE, TY) \
static inline TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, \
unsigned delta, int width) { \
__builtin_trap(); \
}
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(int, i)
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(float, f)
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(long, l)
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(double, d)
#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL
///}
#pragma omp end declare variant
/// ompx_{sync_block}_{,divergent}
///{
#define _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(RETTY, NAME, ARGS) \
RETTY ompx_##NAME(ARGS);
_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering)
_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void)
_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering)
#undef _TGT_KERNEL_LANGUAGE_DECL_SYNC_C
///}
/// ompx_{thread,block}_{id,dim}_{x,y,z}
///{
#define _TGT_KERNEL_LANGUAGE_DECL_GRID_C(NAME) \
int ompx_##NAME(int Dim); \
static inline int ompx_##NAME##_x() { return ompx_##NAME(ompx_dim_x); } \
static inline int ompx_##NAME##_y() { return ompx_##NAME(ompx_dim_y); } \
static inline int ompx_##NAME##_z() { return ompx_##NAME(ompx_dim_z); }
_TGT_KERNEL_LANGUAGE_DECL_GRID_C(thread_id)
_TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_dim)
_TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_id)
_TGT_KERNEL_LANGUAGE_DECL_GRID_C(grid_dim)
#undef _TGT_KERNEL_LANGUAGE_DECL_GRID_C
///}
uint64_t ompx_ballot_sync(uint64_t mask, int pred);
/// ompx_shfl_down_sync_{i,f,l,d}
///{
#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \
TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, unsigned delta, \
int width);
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
///}
#ifdef __cplusplus
}
#endif
#ifdef __cplusplus
namespace ompx {
enum {
dim_x = ompx_dim_x,
dim_y = ompx_dim_y,
dim_z = ompx_dim_z,
};
enum {
relaxed = ompx_relaxed ,
aquire = ompx_aquire,
release = ompx_release,
acc_rel = ompx_acq_rel,
seq_cst = ompx_seq_cst,
};
/// ompx::{thread,block}_{id,dim}_{,x,y,z}
///{
#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(NAME) \
static inline int NAME(int Dim) noexcept { return ompx_##NAME(Dim); } \
static inline int NAME##_x() noexcept { return NAME(ompx_dim_x); } \
static inline int NAME##_y() noexcept { return NAME(ompx_dim_y); } \
static inline int NAME##_z() noexcept { return NAME(ompx_dim_z); }
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(thread_id)
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_dim)
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_id)
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(grid_dim)
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX
///}
/// ompx_{sync_block}_{,divergent}
///{
#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(RETTY, NAME, ARGS, CALL_ARGS) \
static inline RETTY NAME(ARGS) { \
return ompx_##NAME(CALL_ARGS); \
}
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block, int Ordering = acc_rel,
Ordering)
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block_divergent,
int Ordering = acc_rel, Ordering)
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX
///}
static inline uint64_t ballot_sync(uint64_t mask, int pred) {
return ompx_ballot_sync(mask, pred);
}
/// shfl_down_sync
///{
#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \
static inline TYPE shfl_down_sync(uint64_t mask, TYPE var, unsigned delta, \
int width = __WARP_SIZE) { \
return ompx_shfl_down_sync_##TY(mask, var, delta, width); \
}
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
///}
} // namespace ompx
#endif
///}
#endif /* __OMPX_H */