//===- AMDGPUOpenCLEnqueuedBlockLowering.cpp - Lower enqueued block -------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // \file // This post-linking pass replaces the function pointer of enqueued // block kernel with a global variable (runtime handle) and adds // "runtime-handle" attribute to the enqueued block kernel. // // In LLVM CodeGen the runtime-handle metadata will be translated to // RuntimeHandle metadata in code object. Runtime allocates a global buffer // for each kernel with RuntimeHandle metadata and saves the kernel address // required for the AQL packet into the buffer. __enqueue_kernel function // in device library knows that the invoke function pointer in the block // literal is actually runtime handle and loads the kernel address from it // and put it into AQL packet for dispatching. // // This cannot be done in FE since FE cannot create a unique global variable // with external linkage across LLVM modules. The global variable with internal // linkage does not work since optimization passes will try to replace loads // of the global variable with its initialization value. // // It also identifies the kernels directly or indirectly enqueues kernels // and adds "calls-enqueue-kernel" function attribute to them, which will // be used to determine whether to emit runtime metadata for the kernel // enqueue related hidden kernel arguments. // //===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE … usingnamespacellvm; namespace { /// Lower enqueued blocks. class AMDGPUOpenCLEnqueuedBlockLowering : public ModulePass { … }; } // end anonymous namespace char AMDGPUOpenCLEnqueuedBlockLowering::ID = …; char &llvm::AMDGPUOpenCLEnqueuedBlockLoweringID = …; INITIALIZE_PASS(…) ModulePass* llvm::createAMDGPUOpenCLEnqueuedBlockLoweringPass() { … } bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) { … }