llvm/openmp/runtime/src/include/ompx.h.var

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef __OMPX_H
#define __OMPX_H

#ifdef __AMDGCN_WAVEFRONT_SIZE
#define __WARP_SIZE __AMDGCN_WAVEFRONT_SIZE
#else
#define __WARP_SIZE 32
#endif

typedef unsigned long uint64_t;

#ifdef __cplusplus
extern "C" {
#endif

int omp_get_ancestor_thread_num(int);
int omp_get_team_size(int);

#ifdef __cplusplus
}
#endif

/// Target kernel language extensions
///
/// These extensions exist for the host to allow fallback implementations,
/// however, they cannot be arbitrarily composed with OpenMP. If the rules of
/// the kernel language are followed, the host fallbacks should behave as
/// expected since the kernel is represented as 3 sequential outer loops, one
/// for each grid dimension, and three (nested) parallel loops, one for each
/// block dimension. This fallback is not supposed to be optimal and should be
/// configurable by the user.
///
///{

#ifdef __cplusplus
extern "C" {
#endif

enum {
  ompx_relaxed = __ATOMIC_RELAXED,
  ompx_aquire = __ATOMIC_ACQUIRE,
  ompx_release = __ATOMIC_RELEASE,
  ompx_acq_rel = __ATOMIC_ACQ_REL,
  ompx_seq_cst = __ATOMIC_SEQ_CST,
};

enum {
  ompx_dim_x = 0,
  ompx_dim_y = 1,
  ompx_dim_z = 2,
};

// TODO: The following implementation is for host fallback. We need to disable
// generation of host fallback in kernel language mode.
#pragma omp begin declare variant match(device = {kind(cpu)})

/// ompx_{thread,block}_{id,dim}
///{
#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(NAME, VALUE)                     \
  static inline int ompx_##NAME(int Dim) { return VALUE; }

_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(thread_id,
                                      omp_get_ancestor_thread_num(Dim + 1))
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_dim, omp_get_team_size(Dim + 1))
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_id, 0)
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(grid_dim, 1)
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C
///}

/// ompx_{sync_block}_{,divergent}
///{
#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(RETTY, NAME, ARGS, BODY)         \
  static inline RETTY ompx_##NAME(ARGS) { BODY; }

_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block, int Ordering,
                                      _Pragma("omp barrier"))
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_acq_rel, void,
                                      ompx_sync_block(ompx_acq_rel))
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_divergent, int Ordering,
                                      ompx_sync_block(Ordering))
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C
///}

static inline uint64_t ompx_ballot_sync(uint64_t mask, int pred) {
  __builtin_trap();
}

/// ompx_shfl_down_sync_{i,f,l,d}
///{
#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(TYPE, TY)                \
  static inline TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var,         \
                                              unsigned delta, int width) {     \
    __builtin_trap();                                                          \
  }

_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(int, i)
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(float, f)
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(long, l)
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(double, d)

#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL
///}

#pragma omp end declare variant

/// ompx_{sync_block}_{,divergent}
///{
#define _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(RETTY, NAME, ARGS)         \
  RETTY ompx_##NAME(ARGS);

_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering)
_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void)
_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering)
#undef _TGT_KERNEL_LANGUAGE_DECL_SYNC_C
///}

/// ompx_{thread,block}_{id,dim}_{x,y,z}
///{
#define _TGT_KERNEL_LANGUAGE_DECL_GRID_C(NAME)                                 \
  int ompx_##NAME(int Dim);                                                    \
  static inline int ompx_##NAME##_x() { return ompx_##NAME(ompx_dim_x); }      \
  static inline int ompx_##NAME##_y() { return ompx_##NAME(ompx_dim_y); }      \
  static inline int ompx_##NAME##_z() { return ompx_##NAME(ompx_dim_z); }

_TGT_KERNEL_LANGUAGE_DECL_GRID_C(thread_id)
_TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_dim)
_TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_id)
_TGT_KERNEL_LANGUAGE_DECL_GRID_C(grid_dim)
#undef _TGT_KERNEL_LANGUAGE_DECL_GRID_C
///}

uint64_t ompx_ballot_sync(uint64_t mask, int pred);

/// ompx_shfl_down_sync_{i,f,l,d}
///{
#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY)                          \
  TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, unsigned delta,       \
                                int width);

_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)

#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
///}

#ifdef __cplusplus
}
#endif

#ifdef __cplusplus

namespace ompx {

enum {
  dim_x = ompx_dim_x,
  dim_y = ompx_dim_y,
  dim_z = ompx_dim_z,
};

enum {
  relaxed = ompx_relaxed ,
  aquire = ompx_aquire,
  release = ompx_release,
  acc_rel = ompx_acq_rel,
  seq_cst = ompx_seq_cst,
};

/// ompx::{thread,block}_{id,dim}_{,x,y,z}
///{
#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(NAME)                          \
  static inline int NAME(int Dim) noexcept { return ompx_##NAME(Dim); }        \
  static inline int NAME##_x() noexcept { return NAME(ompx_dim_x); }           \
  static inline int NAME##_y() noexcept { return NAME(ompx_dim_y); }           \
  static inline int NAME##_z() noexcept { return NAME(ompx_dim_z); }

_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(thread_id)
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_dim)
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_id)
_TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(grid_dim)
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX
///}

/// ompx_{sync_block}_{,divergent}
///{
#define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(RETTY, NAME, ARGS, CALL_ARGS)  \
  static inline RETTY NAME(ARGS) {               \
    return ompx_##NAME(CALL_ARGS);                                             \
  }

_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block, int Ordering = acc_rel,
                                        Ordering)
_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block_divergent,
                                        int Ordering = acc_rel, Ordering)
#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX
///}

static inline uint64_t ballot_sync(uint64_t mask, int pred) {
  return ompx_ballot_sync(mask, pred);
}

/// shfl_down_sync
///{
#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY)                          \
  static inline TYPE shfl_down_sync(uint64_t mask, TYPE var, unsigned delta,   \
                                    int width = __WARP_SIZE) {                 \
    return ompx_shfl_down_sync_##TY(mask, var, delta, width);                  \
  }

_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)

#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
///}

} // namespace ompx
#endif

///}

#endif /* __OMPX_H */