llvm/offload/test/mapping/auto_zero_copy_globals.cpp

// clang-format off
// RUN: %libomptarget-compilexx-generic
// RUN: env OMPX_APU_MAPS=1 HSA_XNACK=1 LIBOMPTARGET_INFO=60 %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic -check-prefix=CHECK

// REQUIRES: amdgpu
// REQUIRES: unified_shared_memory

// clang-format on

#include <cstdint>
#include <cstdio>

/// Test for globals under automatic zero-copy.
/// Because we are building without unified_shared_memory
/// requirement pragma, all globals are allocated in the device
/// memory of all used GPUs. To ensure those globals contain the intended
/// values, we need to execute H2D and D2H memory copies even if we are running
/// in automatic zero-copy. This only applies to globals. Local variables (their
/// host pointers) are passed to the kernels by-value, according to the
/// automatic zero-copy behavior.

#pragma omp begin declare target
int32_t x;     // 4 bytes
int32_t z[10]; // 40 bytes
int32_t *k;    // 20 bytes
#pragma omp end declare target

int main() {
  int32_t *dev_k = nullptr;
  x = 3;
  int32_t y = -1;
  for (size_t t = 0; t < 10; t++)
    z[t] = t;
  k = new int32_t[5];

  printf("Host pointer for k = %p\n", k);
  for (size_t t = 0; t < 5; t++)
    k[t] = -t;

/// target update to forces a copy between host and device global, which we must
/// execute to keep the two global copies consistent. CHECK: Copying data from
/// host to device, HstPtr={{.*}}, TgtPtr={{.*}}, Size=40, Name=z
#pragma omp target update to(z[ : 10])

/// target map with always modifier (for x) forces a copy between host and
/// device global, which we must execute to keep the two global copies
/// consistent. k's content (host address) is passed by-value to the kernel
/// (Size=20 case). y, being a local variable, is also passed by-value to the
/// kernel (Size=4 case) CHECK: Return HstPtrBegin {{.*}} Size=4 for unified
/// shared memory CHECK: Return HstPtrBegin {{.*}} Size=20 for unified shared
/// memory CHECK: Copying data from host to device, HstPtr={{.*}},
/// TgtPtr={{.*}}, Size=4, Name=x
#pragma omp target map(to : k[ : 5]) map(always, tofrom : x) map(tofrom : y)   \
    map(from : dev_k)
  {
    x++;
    y++;
    for (size_t t = 0; t < 10; t++)
      z[t]++;
    dev_k = k;
  }
/// CHECK-NOT: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
/// Size=20, Name=k

/// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
/// Size=4, Name=x

/// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
/// Size=40, Name=z
#pragma omp target update from(z[ : 10])

  /// CHECK-NOT: k pointer not correctly passed to kernel
  if (dev_k != k)
    printf("k pointer not correctly passed to kernel\n");

  delete[] k;
  return 0;
}