//===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This provides a class for CUDA code generation targeting the NVIDIA CUDA // runtime library. // //===----------------------------------------------------------------------===// #include "CGCUDARuntime.h" #include "CGCXXABI.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "clang/AST/CharUnits.h" #include "clang/AST/Decl.h" #include "clang/Basic/Cuda.h" #include "clang/CodeGen/CodeGenABITypes.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/ADT/StringRef.h" #include "llvm/Frontend/Offloading/Utility.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/ReplaceConstant.h" #include "llvm/Support/Format.h" #include "llvm/Support/VirtualFileSystem.h" usingnamespaceclang; usingnamespaceCodeGen; namespace { constexpr unsigned CudaFatMagic = …; constexpr unsigned HIPFatMagic = …; // "HIPF" class CGNVCUDARuntime : public CGCUDARuntime { … }; } // end anonymous namespace std::string CGNVCUDARuntime::addPrefixToName(StringRef FuncName) const { … } std::string CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const { … } static std::unique_ptr<MangleContext> InitDeviceMC(CodeGenModule &CGM) { … } CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : … { … } llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const { … } llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn() const { … } llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy() const { … } llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy() const { … } llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const { … } std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) { … } void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) { … } /// CUDA passes the arguments with a level of indirection. For example, a /// (void*, short, void*) is passed as {void **, short *, void **} to the launch /// function. For the LLVM/offload launch we flatten the arguments into the /// struct directly. In addition, we include the size of the arguments, thus /// pass {sizeof({void *, short, void *}), ptr to {void *, short, void *}, /// nullptr}. The last nullptr needs to be initialized to an array of pointers /// pointing to the arguments if we want to offload to the host. Address CGNVCUDARuntime::prepareKernelArgsLLVMOffload(CodeGenFunction &CGF, FunctionArgList &Args) { … } Address CGNVCUDARuntime::prepareKernelArgs(CodeGenFunction &CGF, FunctionArgList &Args) { … } // CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local // array and kernels are launched using cudaLaunchKernel(). void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args) { … } void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args) { … } // Replace the original variable Var with the address loaded from variable // ManagedVar populated by HIP runtime. static void replaceManagedVar(llvm::GlobalVariable *Var, llvm::GlobalVariable *ManagedVar) { … } /// Creates a function that sets up state on the host side for CUDA objects that /// have a presence on both the host and device sides. Specifically, registers /// the host side of kernel functions and device global variables with the CUDA /// runtime. /// \code /// void __cuda_register_globals(void** GpuBinaryHandle) { /// __cudaRegisterFunction(GpuBinaryHandle,Kernel0,...); /// ... /// __cudaRegisterFunction(GpuBinaryHandle,KernelM,...); /// __cudaRegisterVar(GpuBinaryHandle, GlobalVar0, ...); /// ... /// __cudaRegisterVar(GpuBinaryHandle, GlobalVarN, ...); /// } /// \endcode llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { … } /// Creates a global constructor function for the module: /// /// For CUDA: /// \code /// void __cuda_module_ctor() { /// Handle = __cudaRegisterFatBinary(GpuBinaryBlob); /// __cuda_register_globals(Handle); /// } /// \endcode /// /// For HIP: /// \code /// void __hip_module_ctor() { /// if (__hip_gpubin_handle == 0) { /// __hip_gpubin_handle = __hipRegisterFatBinary(GpuBinaryBlob); /// __hip_register_globals(__hip_gpubin_handle); /// } /// } /// \endcode llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { … } /// Creates a global destructor function that unregisters the GPU code blob /// registered by constructor. /// /// For CUDA: /// \code /// void __cuda_module_dtor() { /// __cudaUnregisterFatBinary(Handle); /// } /// \endcode /// /// For HIP: /// \code /// void __hip_module_dtor() { /// if (__hip_gpubin_handle) { /// __hipUnregisterFatBinary(__hip_gpubin_handle); /// __hip_gpubin_handle = 0; /// } /// } /// \endcode llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() { … } CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) { … } void CGNVCUDARuntime::internalizeDeviceSideVar( const VarDecl *D, llvm::GlobalValue::LinkageTypes &Linkage) { … } void CGNVCUDARuntime::handleVarRegistration(const VarDecl *D, llvm::GlobalVariable &GV) { … } // Transform managed variables to pointers to managed variables in device code. // Each use of the original managed variable is replaced by a load from the // transformed managed variable. The transformed managed variable contains // the address of managed memory which will be allocated by the runtime. void CGNVCUDARuntime::transformManagedVars() { … } // Creates offloading entries for all the kernels and globals that must be // registered. The linker will provide a pointer to this section so we can // register the symbols with the linked device image. void CGNVCUDARuntime::createOffloadingEntries() { … } // Returns module constructor to be added. llvm::Function *CGNVCUDARuntime::finalizeModule() { … } llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F, GlobalDecl GD) { … }