llvm/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp

//===-- AMDGPUSwLowerLDS.cpp -----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass lowers the local data store, LDS, uses in kernel and non-kernel
// functions in module to use dynamically allocated global memory.
// Packed LDS Layout is emulated in the global memory.
// The lowered memory instructions from LDS to global memory are then
// instrumented for address sanitizer, to catch addressing errors.
// This pass only work when address sanitizer has been enabled and has
// instrumented the IR. It identifies that IR has been instrumented using
// "nosanitize_address" module flag.
//
// Replacement of Kernel LDS accesses:
//    For a kernel, LDS access can be static or dynamic which are direct
//    (accessed within kernel) and indirect (accessed through non-kernels).
//    All these LDS accesses corresponding to kernel will be packed together,
//    where all static LDS accesses will be allocated first and then dynamic
//    LDS follows. The total size with alignment is calculated. A new LDS global
//    will be created for the kernel called "SW LDS" and it will have the
//    attribute "amdgpu-lds-size" attached with value of the size calculated.
//    All the LDS accesses in the module will be replaced by GEP with offset
//    into the "Sw LDS".
//    A new "llvm.amdgcn.<kernel>.dynlds" is created per kernel accessing
//    the dynamic LDS. This will be marked used by kernel and will have
//    MD_absolue_symbol metadata set to total static LDS size, Since dynamic
//    LDS allocation starts after all static LDS allocation.
//
//    A device global memory equal to the total LDS size will be allocated.
//    At the prologue of the kernel, a single work-item from the
//    work-group, does a "malloc" and stores the pointer of the
//    allocation in "SW LDS".
//
//    To store the offsets corresponding to all LDS accesses, another global
//    variable is created which will be called "SW LDS metadata" in this pass.
//    - SW LDS Global:
//        It is LDS global of ptr type with name
//        "llvm.amdgcn.sw.lds.<kernel-name>".
//    - Metadata Global:
//        It is of struct type, with n members. n equals the number of LDS
//        globals accessed by the kernel(direct and indirect). Each member of
//        struct is another struct of type {i32, i32, i32}. First member
//        corresponds to offset, second member corresponds to size of LDS global
//        being replaced and third represents the total aligned size. It will
//        have name "llvm.amdgcn.sw.lds.<kernel-name>.md". This global will have
//        an intializer with static LDS related offsets and sizes initialized.
//        But for dynamic LDS related entries, offsets will be intialized to
//        previous static LDS allocation end offset. Sizes for them will be zero
//        initially. These dynamic LDS offset and size values will be updated
//        within the kernel, since kernel can read the dynamic LDS size
//        allocation done at runtime with query to "hidden_dynamic_lds_size"
//        hidden kernel argument.
//
//    At the epilogue of kernel, allocated memory would be made free by the same
//    single work-item.
//
// Replacement of non-kernel LDS accesses:
//    Multiple kernels can access the same non-kernel function.
//    All the kernels accessing LDS through non-kernels are sorted and
//    assigned a kernel-id. All the LDS globals accessed by non-kernels
//    are sorted. This information is used to build two tables:
//    - Base table:
//        Base table will have single row, with elements of the row
//        placed as per kernel ID. Each element in the row corresponds
//        to ptr of "SW LDS" variable created for that kernel.
//    - Offset table:
//        Offset table will have multiple rows and columns.
//        Rows are assumed to be from 0 to (n-1). n is total number
//        of kernels accessing the LDS through non-kernels.
//        Each row will have m elements. m is the total number of
//        unique LDS globals accessed by all non-kernels.
//        Each element in the row correspond to the ptr of
//        the replacement of LDS global done by that particular kernel.
//    A LDS variable in non-kernel will be replaced based on the information
//    from base and offset tables. Based on kernel-id query, ptr of "SW
//    LDS" for that corresponding kernel is obtained from base table.
//    The Offset into the base "SW LDS" is obtained from
//    corresponding element in offset table. With this information, replacement
//    value is obtained.
//===----------------------------------------------------------------------===//

#include "AMDGPU.h"
#include "AMDGPUAsanInstrumentation.h"
#include "AMDGPUMemoryUtils.h"
#include "AMDGPUTargetMachine.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/ReplaceConstant.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"

#include <algorithm>

#define DEBUG_TYPE
#define COV5_HIDDEN_DYN_LDS_SIZE_ARG

usingnamespacellvm;
usingnamespaceAMDGPU;

namespace {

cl::opt<bool>
    AsanInstrumentLDS("amdgpu-asan-instrument-lds",
                      cl::desc("Run asan instrumentation on LDS instructions "
                               "lowered to global memory"),
                      cl::init(true), cl::Hidden);

DomTreeCallback;

struct LDSAccessTypeInfo {};

// Struct to hold all the Metadata required for a kernel
// to replace a LDS global uses with corresponding offset
// in to device global memory.
struct KernelLDSParameters {};

// Struct to store information for creation of offset table
// for all the non-kernel LDS accesses.
struct NonKernelLDSParameters {};

struct AsanInstrumentInfo {};

struct FunctionsAndLDSAccess {};

class AMDGPUSwLowerLDS {};

template <typename T> SetVector<T> sortByName(std::vector<T> &&V) {}

SetVector<GlobalVariable *> AMDGPUSwLowerLDS::getOrderedNonKernelAllLDSGlobals(
    SetVector<GlobalVariable *> &Variables) {}

SetVector<Function *> AMDGPUSwLowerLDS::getOrderedIndirectLDSAccessingKernels(
    SetVector<Function *> &Kernels) {}

void AMDGPUSwLowerLDS::getNonKernelsWithLDSArguments(const CallGraph &CG) {}

void AMDGPUSwLowerLDS::getUsesOfLDSByNonKernels() {}

static void recordLDSAbsoluteAddress(Module &M, GlobalVariable *GV,
                                     uint32_t Address) {}

static void addLDSSizeAttribute(Function *Func, uint32_t Offset,
                                bool IsDynLDS) {}

static void markUsedByKernel(Function *Func, GlobalVariable *SGV) {}

void AMDGPUSwLowerLDS::buildSwLDSGlobal(Function *Func) {}

void AMDGPUSwLowerLDS::buildSwDynLDSGlobal(Function *Func) {}

void AMDGPUSwLowerLDS::populateSwLDSAttributeAndMetadata(Function *Func) {}

void AMDGPUSwLowerLDS::populateSwMetadataGlobal(Function *Func) {}

void AMDGPUSwLowerLDS::populateLDSToReplacementIndicesMap(Function *Func) {}

static void replacesUsesOfGlobalInFunction(Function *Func, GlobalVariable *GV,
                                           Value *Replacement) {}

void AMDGPUSwLowerLDS::replaceKernelLDSAccesses(Function *Func) {}

void AMDGPUSwLowerLDS::updateMallocSizeForDynamicLDS(
    Function *Func, Value **CurrMallocSize, Value *HiddenDynLDSSize,
    SetVector<GlobalVariable *> &DynamicLDSGlobals) {}

static DebugLoc getOrCreateDebugLoc(const Instruction *InsertBefore,
                                    DISubprogram *SP) {}

void AMDGPUSwLowerLDS::getLDSMemoryInstructions(
    Function *Func, SetVector<Instruction *> &LDSInstructions) {}

Value *
AMDGPUSwLowerLDS::getTranslatedGlobalMemoryGEPOfLDSPointer(Value *LoadMallocPtr,
                                                           Value *LDSPtr) {}

void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
    Function *Func, Value *LoadMallocPtr,
    SetVector<Instruction *> &LDSInstructions) {}

void AMDGPUSwLowerLDS::poisonRedzones(Function *Func, Value *MallocPtr) {}

void AMDGPUSwLowerLDS::lowerKernelLDSAccesses(Function *Func,
                                              DomTreeUpdater &DTU) {}

Constant *AMDGPUSwLowerLDS::getAddressesOfVariablesInKernel(
    Function *Func, SetVector<GlobalVariable *> &Variables) {}

void AMDGPUSwLowerLDS::buildNonKernelLDSBaseTable(
    NonKernelLDSParameters &NKLDSParams) {}

void AMDGPUSwLowerLDS::buildNonKernelLDSOffsetTable(
    NonKernelLDSParameters &NKLDSParams) {}

void AMDGPUSwLowerLDS::lowerNonKernelLDSAccesses(
    Function *Func, SetVector<GlobalVariable *> &LDSGlobals,
    NonKernelLDSParameters &NKLDSParams) {}

static void reorderStaticDynamicIndirectLDSSet(KernelLDSParameters &LDSParams) {}

void AMDGPUSwLowerLDS::initAsanInfo() {}

bool AMDGPUSwLowerLDS::run() {}

class AMDGPUSwLowerLDSLegacy : public ModulePass {};
} // namespace

char AMDGPUSwLowerLDSLegacy::ID =;
char &llvm::AMDGPUSwLowerLDSLegacyPassID =;

INITIALIZE_PASS_BEGIN(AMDGPUSwLowerLDSLegacy, "amdgpu-sw-lower-lds",
                      "AMDGPU Software lowering of LDS", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(AMDGPUSwLowerLDSLegacy, "amdgpu-sw-lower-lds",
                    "AMDGPU Software lowering of LDS", false, false)

bool AMDGPUSwLowerLDSLegacy::runOnModule(Module &M) {}

ModulePass *
llvm::createAMDGPUSwLowerLDSLegacyPass(const AMDGPUTargetMachine *TM) {}

PreservedAnalyses AMDGPUSwLowerLDSPass::run(Module &M,
                                            ModuleAnalysisManager &AM) {}