//===-- AMDGPUSwLowerLDS.cpp -----------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass lowers the local data store, LDS, uses in kernel and non-kernel // functions in module to use dynamically allocated global memory. // Packed LDS Layout is emulated in the global memory. // The lowered memory instructions from LDS to global memory are then // instrumented for address sanitizer, to catch addressing errors. // This pass only work when address sanitizer has been enabled and has // instrumented the IR. It identifies that IR has been instrumented using // "nosanitize_address" module flag. // // Replacement of Kernel LDS accesses: // For a kernel, LDS access can be static or dynamic which are direct // (accessed within kernel) and indirect (accessed through non-kernels). // All these LDS accesses corresponding to kernel will be packed together, // where all static LDS accesses will be allocated first and then dynamic // LDS follows. The total size with alignment is calculated. A new LDS global // will be created for the kernel called "SW LDS" and it will have the // attribute "amdgpu-lds-size" attached with value of the size calculated. // All the LDS accesses in the module will be replaced by GEP with offset // into the "Sw LDS". // A new "llvm.amdgcn.<kernel>.dynlds" is created per kernel accessing // the dynamic LDS. This will be marked used by kernel and will have // MD_absolue_symbol metadata set to total static LDS size, Since dynamic // LDS allocation starts after all static LDS allocation. // // A device global memory equal to the total LDS size will be allocated. // At the prologue of the kernel, a single work-item from the // work-group, does a "malloc" and stores the pointer of the // allocation in "SW LDS". // // To store the offsets corresponding to all LDS accesses, another global // variable is created which will be called "SW LDS metadata" in this pass. // - SW LDS Global: // It is LDS global of ptr type with name // "llvm.amdgcn.sw.lds.<kernel-name>". // - Metadata Global: // It is of struct type, with n members. n equals the number of LDS // globals accessed by the kernel(direct and indirect). Each member of // struct is another struct of type {i32, i32, i32}. First member // corresponds to offset, second member corresponds to size of LDS global // being replaced and third represents the total aligned size. It will // have name "llvm.amdgcn.sw.lds.<kernel-name>.md". This global will have // an intializer with static LDS related offsets and sizes initialized. // But for dynamic LDS related entries, offsets will be intialized to // previous static LDS allocation end offset. Sizes for them will be zero // initially. These dynamic LDS offset and size values will be updated // within the kernel, since kernel can read the dynamic LDS size // allocation done at runtime with query to "hidden_dynamic_lds_size" // hidden kernel argument. // // At the epilogue of kernel, allocated memory would be made free by the same // single work-item. // // Replacement of non-kernel LDS accesses: // Multiple kernels can access the same non-kernel function. // All the kernels accessing LDS through non-kernels are sorted and // assigned a kernel-id. All the LDS globals accessed by non-kernels // are sorted. This information is used to build two tables: // - Base table: // Base table will have single row, with elements of the row // placed as per kernel ID. Each element in the row corresponds // to ptr of "SW LDS" variable created for that kernel. // - Offset table: // Offset table will have multiple rows and columns. // Rows are assumed to be from 0 to (n-1). n is total number // of kernels accessing the LDS through non-kernels. // Each row will have m elements. m is the total number of // unique LDS globals accessed by all non-kernels. // Each element in the row correspond to the ptr of // the replacement of LDS global done by that particular kernel. // A LDS variable in non-kernel will be replaced based on the information // from base and offset tables. Based on kernel-id query, ptr of "SW // LDS" for that corresponding kernel is obtained from base table. // The Offset into the base "SW LDS" is obtained from // corresponding element in offset table. With this information, replacement // value is obtained. //===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "AMDGPUAsanInstrumentation.h" #include "AMDGPUMemoryUtils.h" #include "AMDGPUTargetMachine.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/ReplaceConstant.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include <algorithm> #define DEBUG_TYPE … #define COV5_HIDDEN_DYN_LDS_SIZE_ARG … usingnamespacellvm; usingnamespaceAMDGPU; namespace { cl::opt<bool> AsanInstrumentLDS("amdgpu-asan-instrument-lds", cl::desc("Run asan instrumentation on LDS instructions " "lowered to global memory"), cl::init(true), cl::Hidden); DomTreeCallback; struct LDSAccessTypeInfo { … }; // Struct to hold all the Metadata required for a kernel // to replace a LDS global uses with corresponding offset // in to device global memory. struct KernelLDSParameters { … }; // Struct to store information for creation of offset table // for all the non-kernel LDS accesses. struct NonKernelLDSParameters { … }; struct AsanInstrumentInfo { … }; struct FunctionsAndLDSAccess { … }; class AMDGPUSwLowerLDS { … }; template <typename T> SetVector<T> sortByName(std::vector<T> &&V) { … } SetVector<GlobalVariable *> AMDGPUSwLowerLDS::getOrderedNonKernelAllLDSGlobals( SetVector<GlobalVariable *> &Variables) { … } SetVector<Function *> AMDGPUSwLowerLDS::getOrderedIndirectLDSAccessingKernels( SetVector<Function *> &Kernels) { … } void AMDGPUSwLowerLDS::getNonKernelsWithLDSArguments(const CallGraph &CG) { … } void AMDGPUSwLowerLDS::getUsesOfLDSByNonKernels() { … } static void recordLDSAbsoluteAddress(Module &M, GlobalVariable *GV, uint32_t Address) { … } static void addLDSSizeAttribute(Function *Func, uint32_t Offset, bool IsDynLDS) { … } static void markUsedByKernel(Function *Func, GlobalVariable *SGV) { … } void AMDGPUSwLowerLDS::buildSwLDSGlobal(Function *Func) { … } void AMDGPUSwLowerLDS::buildSwDynLDSGlobal(Function *Func) { … } void AMDGPUSwLowerLDS::populateSwLDSAttributeAndMetadata(Function *Func) { … } void AMDGPUSwLowerLDS::populateSwMetadataGlobal(Function *Func) { … } void AMDGPUSwLowerLDS::populateLDSToReplacementIndicesMap(Function *Func) { … } static void replacesUsesOfGlobalInFunction(Function *Func, GlobalVariable *GV, Value *Replacement) { … } void AMDGPUSwLowerLDS::replaceKernelLDSAccesses(Function *Func) { … } void AMDGPUSwLowerLDS::updateMallocSizeForDynamicLDS( Function *Func, Value **CurrMallocSize, Value *HiddenDynLDSSize, SetVector<GlobalVariable *> &DynamicLDSGlobals) { … } static DebugLoc getOrCreateDebugLoc(const Instruction *InsertBefore, DISubprogram *SP) { … } void AMDGPUSwLowerLDS::getLDSMemoryInstructions( Function *Func, SetVector<Instruction *> &LDSInstructions) { … } Value * AMDGPUSwLowerLDS::getTranslatedGlobalMemoryGEPOfLDSPointer(Value *LoadMallocPtr, Value *LDSPtr) { … } void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory( Function *Func, Value *LoadMallocPtr, SetVector<Instruction *> &LDSInstructions) { … } void AMDGPUSwLowerLDS::poisonRedzones(Function *Func, Value *MallocPtr) { … } void AMDGPUSwLowerLDS::lowerKernelLDSAccesses(Function *Func, DomTreeUpdater &DTU) { … } Constant *AMDGPUSwLowerLDS::getAddressesOfVariablesInKernel( Function *Func, SetVector<GlobalVariable *> &Variables) { … } void AMDGPUSwLowerLDS::buildNonKernelLDSBaseTable( NonKernelLDSParameters &NKLDSParams) { … } void AMDGPUSwLowerLDS::buildNonKernelLDSOffsetTable( NonKernelLDSParameters &NKLDSParams) { … } void AMDGPUSwLowerLDS::lowerNonKernelLDSAccesses( Function *Func, SetVector<GlobalVariable *> &LDSGlobals, NonKernelLDSParameters &NKLDSParams) { … } static void reorderStaticDynamicIndirectLDSSet(KernelLDSParameters &LDSParams) { … } void AMDGPUSwLowerLDS::initAsanInfo() { … } bool AMDGPUSwLowerLDS::run() { … } class AMDGPUSwLowerLDSLegacy : public ModulePass { … }; } // namespace char AMDGPUSwLowerLDSLegacy::ID = …; char &llvm::AMDGPUSwLowerLDSLegacyPassID = …; INITIALIZE_PASS_BEGIN(AMDGPUSwLowerLDSLegacy, "amdgpu-sw-lower-lds", "AMDGPU Software lowering of LDS", false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_END(AMDGPUSwLowerLDSLegacy, "amdgpu-sw-lower-lds", "AMDGPU Software lowering of LDS", false, false) bool AMDGPUSwLowerLDSLegacy::runOnModule(Module &M) { … } ModulePass * llvm::createAMDGPUSwLowerLDSLegacyPass(const AMDGPUTargetMachine *TM) { … } PreservedAnalyses AMDGPUSwLowerLDSPass::run(Module &M, ModuleAnalysisManager &AM) { … }