llvm/clang/test/CodeGenCUDA/host-used-extern.cu

// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
// RUN:   -fgpu-rdc -std=c++11 -emit-llvm -o - -target-cpu gfx906 | FileCheck %s

// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
// RUN:   -fgpu-rdc -std=c++11 -emit-llvm -o - -target-cpu gfx906 \
// RUN:   | FileCheck -check-prefix=NEG %s

// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
// RUN:   -std=c++11 -emit-llvm -o - -target-cpu gfx906 \
// RUN:   | FileCheck -check-prefixes=NEG,NORDC %s

// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -x hip %s \
// RUN:   -fgpu-rdc -std=c++11 -emit-llvm -o - \
// RUN:   | FileCheck -check-prefix=HOST-NEG %s


#include "Inputs/cuda.h"

// CHECK-LABEL: @__clang_gpu_used_external = internal {{.*}}global
// CHECK-DAG: @_Z7kernel1v
// CHECK-DAG: @_Z7kernel4v
// CHECK-DAG: @var1
// CHECK-LABEL: @llvm.compiler.used = {{.*}} @__clang_gpu_used_external

// NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel2v
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel3v
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel5v
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @var2
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @var3
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @ext_shvar
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @shvar
// NORDC-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel1v
// NORDC-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel4v
// NORDC-NOT: @__clang_gpu_used_external = {{.*}} @var1
// HOST-NEG-NOT: call void @__hipRegisterVar({{.*}}, ptr @ext_shvar
// HOST-NEG-NOT: call void @__hipRegisterVar({{.*}}, ptr @shvar
__global__ void kernel1();

// kernel2 is not marked as used since it is a definition.
__global__ void kernel2() {}

// kernel3 is not marked as used since it is not called by host function.
__global__ void kernel3();

// kernel4 is marked as used even though it is not called.
__global__ void kernel4();

// kernel5 is not marked as used since it is called by host function
// with weak_odr linkage, which may be dropped by linker.
__global__ void kernel5();

extern __device__ int var1;

__device__ int var2;

extern __device__ int var3;

void use(int *p);

void test() {
  kernel1<<<1, 1>>>();
  void *p = (void*)kernel4;
  use(&var1);
}

__global__ void test_lambda_using_extern_shared() {
  extern __shared__ int ext_shvar[];
  __shared__ int shvar[10];
  auto lambda = [&]() {
    ext_shvar[0] = 1;
    shvar[0] = 2;
  };
  lambda();
}

template<class T>
void template_caller() {
  kernel5<<<1, 1>>>();
  var1 = 1;
}

template void template_caller<int>();