CGCUDANV.cpp | Explore in Territory

//===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This provides a class for CUDA code generation targeting the NVIDIA CUDA
// runtime library.
//
//===----------------------------------------------------------------------===//

#include "CGCUDARuntime.h"
#include "CGCXXABI.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "clang/AST/CharUnits.h"
#include "clang/AST/Decl.h"
#include "clang/Basic/Cuda.h"
#include "clang/CodeGen/CodeGenABITypes.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Frontend/Offloading/Utility.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/ReplaceConstant.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/VirtualFileSystem.h"

usingnamespaceclang;
usingnamespaceCodeGen;

namespace {
constexpr unsigned CudaFatMagic = …;
constexpr unsigned HIPFatMagic = …; // "HIPF"

class CGNVCUDARuntime : public CGCUDARuntime { … };

} // end anonymous namespace

std::string CGNVCUDARuntime::addPrefixToName(StringRef FuncName) const { … }
std::string
CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const { … }

static std::unique_ptr<MangleContext> InitDeviceMC(CodeGenModule &CGM) { … }

CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
    : … { … }

llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const { … }

llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn() const { … }

llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy() const { … }

llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy() const { … }

llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const { … }

std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) { … }

void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
                                     FunctionArgList &Args) { … }

/// CUDA passes the arguments with a level of indirection. For example, a
/// (void*, short, void*) is passed as {void **, short *, void **} to the launch
/// function. For the LLVM/offload launch we flatten the arguments into the
/// struct directly. In addition, we include the size of the arguments, thus
/// pass {sizeof({void *, short, void *}), ptr to {void *, short, void *},
/// nullptr}. The last nullptr needs to be initialized to an array of pointers
/// pointing to the arguments if we want to offload to the host.
Address CGNVCUDARuntime::prepareKernelArgsLLVMOffload(CodeGenFunction &CGF,
                                                      FunctionArgList &Args) { … }

Address CGNVCUDARuntime::prepareKernelArgs(CodeGenFunction &CGF,
                                           FunctionArgList &Args) { … }

// CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local
// array and kernels are launched using cudaLaunchKernel().
void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
                                            FunctionArgList &Args) { … }

void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
                                               FunctionArgList &Args) { … }

// Replace the original variable Var with the address loaded from variable
// ManagedVar populated by HIP runtime.
static void replaceManagedVar(llvm::GlobalVariable *Var,
                              llvm::GlobalVariable *ManagedVar) { … }

/// Creates a function that sets up state on the host side for CUDA objects that
/// have a presence on both the host and device sides. Specifically, registers
/// the host side of kernel functions and device global variables with the CUDA
/// runtime.
/// \code
/// void __cuda_register_globals(void** GpuBinaryHandle) {
///    __cudaRegisterFunction(GpuBinaryHandle,Kernel0,...);
///    ...
///    __cudaRegisterFunction(GpuBinaryHandle,KernelM,...);
///    __cudaRegisterVar(GpuBinaryHandle, GlobalVar0, ...);
///    ...
///    __cudaRegisterVar(GpuBinaryHandle, GlobalVarN, ...);
/// }
/// \endcode
llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { … }

/// Creates a global constructor function for the module:
///
/// For CUDA:
/// \code
/// void __cuda_module_ctor() {
///     Handle = __cudaRegisterFatBinary(GpuBinaryBlob);
///     __cuda_register_globals(Handle);
/// }
/// \endcode
///
/// For HIP:
/// \code
/// void __hip_module_ctor() {
///     if (__hip_gpubin_handle == 0) {
///         __hip_gpubin_handle  = __hipRegisterFatBinary(GpuBinaryBlob);
///         __hip_register_globals(__hip_gpubin_handle);
///     }
/// }
/// \endcode
llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { … }

/// Creates a global destructor function that unregisters the GPU code blob
/// registered by constructor.
///
/// For CUDA:
/// \code
/// void __cuda_module_dtor() {
///     __cudaUnregisterFatBinary(Handle);
/// }
/// \endcode
///
/// For HIP:
/// \code
/// void __hip_module_dtor() {
///     if (__hip_gpubin_handle) {
///         __hipUnregisterFatBinary(__hip_gpubin_handle);
///         __hip_gpubin_handle = 0;
///     }
/// }
/// \endcode
llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() { … }

CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) { … }

void CGNVCUDARuntime::internalizeDeviceSideVar(
    const VarDecl *D, llvm::GlobalValue::LinkageTypes &Linkage) { … }

void CGNVCUDARuntime::handleVarRegistration(const VarDecl *D,
                                            llvm::GlobalVariable &GV) { … }

// Transform managed variables to pointers to managed variables in device code.
// Each use of the original managed variable is replaced by a load from the
// transformed managed variable. The transformed managed variable contains
// the address of managed memory which will be allocated by the runtime.
void CGNVCUDARuntime::transformManagedVars() { … }

// Creates offloading entries for all the kernels and globals that must be
// registered. The linker will provide a pointer to this section so we can
// register the symbols with the linked device image.
void CGNVCUDARuntime::createOffloadingEntries() { … }

// Returns module constructor to be added.
llvm::Function *CGNVCUDARuntime::finalizeModule() { … }

llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F,
                                                    GlobalDecl GD) { … }
llvm/clang/lib/CodeGen/CGCUDANV.cpp