//===- InferAddressSpace.cpp - --------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // CUDA C/C++ includes memory space designation as variable type qualifers (such // as __global__ and __shared__). Knowing the space of a memory access allows // CUDA compilers to emit faster PTX loads and stores. For example, a load from // shared memory can be translated to `ld.shared` which is roughly 10% faster // than a generic `ld` on an NVIDIA Tesla K40c. // // Unfortunately, type qualifiers only apply to variable declarations, so CUDA // compilers must infer the memory space of an address expression from // type-qualified variables. // // LLVM IR uses non-zero (so-called) specific address spaces to represent memory // spaces (e.g. addrspace(3) means shared memory). The Clang frontend // places only type-qualified variables in specific address spaces, and then // conservatively `addrspacecast`s each type-qualified variable to addrspace(0) // (so-called the generic address space) for other instructions to use. // // For example, the Clang translates the following CUDA code // __shared__ float a[10]; // float v = a[i]; // to // %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* // %1 = gep [10 x float], [10 x float]* %0, i64 0, i64 %i // %v = load float, float* %1 ; emits ld.f32 // @a is in addrspace(3) since it's type-qualified, but its use from %1 is // redirected to %0 (the generic version of @a). // // The optimization implemented in this file propagates specific address spaces // from type-qualified variable declarations to its users. For example, it // optimizes the above IR to // %1 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i // %v = load float addrspace(3)* %1 ; emits ld.shared.f32 // propagating the addrspace(3) from @a to %1. As the result, the NVPTX // codegen is able to emit ld.shared.f32 for %v. // // Address space inference works in two steps. First, it uses a data-flow // analysis to infer as many generic pointers as possible to point to only one // specific address space. In the above example, it can prove that %1 only // points to addrspace(3). This algorithm was published in // CUDA: Compiling and optimizing for a GPU platform // Chakrabarti, Grover, Aarts, Kong, Kudlur, Lin, Marathe, Murphy, Wang // ICCS 2012 // // Then, address space inference replaces all refinable generic pointers with // equivalent specific pointers. // // The major challenge of implementing this optimization is handling PHINodes, // which may create loops in the data flow graph. This brings two complications. // // First, the data flow analysis in Step 1 needs to be circular. For example, // %generic.input = addrspacecast float addrspace(3)* %input to float* // loop: // %y = phi [ %generic.input, %y2 ] // %y2 = getelementptr %y, 1 // %v = load %y2 // br ..., label %loop, ... // proving %y specific requires proving both %generic.input and %y2 specific, // but proving %y2 specific circles back to %y. To address this complication, // the data flow analysis operates on a lattice: // uninitialized > specific address spaces > generic. // All address expressions (our implementation only considers phi, bitcast, // addrspacecast, and getelementptr) start with the uninitialized address space. // The monotone transfer function moves the address space of a pointer down a // lattice path from uninitialized to specific and then to generic. A join // operation of two different specific address spaces pushes the expression down // to the generic address space. The analysis completes once it reaches a fixed // point. // // Second, IR rewriting in Step 2 also needs to be circular. For example, // converting %y to addrspace(3) requires the compiler to know the converted // %y2, but converting %y2 needs the converted %y. To address this complication, // we break these cycles using "poison" placeholders. When converting an // instruction `I` to a new address space, if its operand `Op` is not converted // yet, we let `I` temporarily use `poison` and fix all the uses later. // For instance, our algorithm first converts %y to // %y' = phi float addrspace(3)* [ %input, poison ] // Then, it converts %y2 to // %y2' = getelementptr %y', 1 // Finally, it fixes the poison in %y' so that // %y' = phi float addrspace(3)* [ %input, %y2' ] // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/InferAddressSpaces.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <cassert> #include <iterator> #include <limits> #include <utility> #include <vector> #define DEBUG_TYPE … usingnamespacellvm; static cl::opt<bool> AssumeDefaultIsFlatAddressSpace( "assume-default-is-flat-addrspace", cl::init(false), cl::ReallyHidden, cl::desc("The default address space is assumed as the flat address space. " "This is mainly for test purpose.")); static const unsigned UninitializedAddressSpace = …; namespace { ValueToAddrSpaceMapTy; // Different from ValueToAddrSpaceMapTy, where a new addrspace is inferred on // the *def* of a value, PredicatedAddrSpaceMapTy is map where a new // addrspace is inferred on the *use* of a pointer. This map is introduced to // infer addrspace from the addrspace predicate assumption built from assume // intrinsic. In that scenario, only specific uses (under valid assumption // context) could be inferred with a new addrspace. PredicatedAddrSpaceMapTy; PostorderStackTy; class InferAddressSpaces : public FunctionPass { … }; class InferAddressSpacesImpl { … }; } // end anonymous namespace char InferAddressSpaces::ID = …; INITIALIZE_PASS_BEGIN(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces", false, false) static Type *getPtrOrVecOfPtrsWithNewAS(Type *Ty, unsigned NewAddrSpace) { … } // Check whether that's no-op pointer bicast using a pair of // `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over // different address spaces. static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL, const TargetTransformInfo *TTI) { … } // Returns true if V is an address expression. // TODO: Currently, we consider only phi, bitcast, addrspacecast, and // getelementptr operators. static bool isAddressExpression(const Value &V, const DataLayout &DL, const TargetTransformInfo *TTI) { … } // Returns the pointer operands of V. // // Precondition: V is an address expression. static SmallVector<Value *, 2> getPointerOperands(const Value &V, const DataLayout &DL, const TargetTransformInfo *TTI) { … } bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II, Value *OldV, Value *NewV) const { … } void InferAddressSpacesImpl::collectRewritableIntrinsicOperands( IntrinsicInst *II, PostorderStackTy &PostorderStack, DenseSet<Value *> &Visited) const { … } // Returns all flat address expressions in function F. The elements are // If V is an unvisited flat address expression, appends V to PostorderStack // and marks it as visited. void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack( Value *V, PostorderStackTy &PostorderStack, DenseSet<Value *> &Visited) const { … } // Returns all flat address expressions in function F. The elements are ordered // in postorder. std::vector<WeakTrackingVH> InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const { … } // A helper function for cloneInstructionWithNewAddressSpace. Returns the clone // of OperandUse.get() in the new address space. If the clone is not ready yet, // returns poison in the new address space as a placeholder. static Value *operandWithNewAddressSpaceOrCreatePoison( const Use &OperandUse, unsigned NewAddrSpace, const ValueToValueMapTy &ValueWithNewAddrSpace, const PredicatedAddrSpaceMapTy &PredicatedAS, SmallVectorImpl<const Use *> *PoisonUsesToFix) { … } // Returns a clone of `I` with its operands converted to those specified in // ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an // operand whose address space needs to be modified might not exist in // ValueWithNewAddrSpace. In that case, uses poison as a placeholder operand and // adds that operand use to PoisonUsesToFix so that caller can fix them later. // // Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast // from a pointer whose type already matches. Therefore, this function returns a // Value* instead of an Instruction*. // // This may also return nullptr in the case the instruction could not be // rewritten. Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace( Instruction *I, unsigned NewAddrSpace, const ValueToValueMapTy &ValueWithNewAddrSpace, const PredicatedAddrSpaceMapTy &PredicatedAS, SmallVectorImpl<const Use *> *PoisonUsesToFix) const { … } // Similar to cloneInstructionWithNewAddressSpace, returns a clone of the // constant expression `CE` with its operands replaced as specified in // ValueWithNewAddrSpace. static Value *cloneConstantExprWithNewAddressSpace( ConstantExpr *CE, unsigned NewAddrSpace, const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL, const TargetTransformInfo *TTI) { … } // Returns a clone of the value `V`, with its operands replaced as specified in // ValueWithNewAddrSpace. This function is called on every flat address // expression whose address space needs to be modified, in postorder. // // See cloneInstructionWithNewAddressSpace for the meaning of PoisonUsesToFix. Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace( Value *V, unsigned NewAddrSpace, const ValueToValueMapTy &ValueWithNewAddrSpace, const PredicatedAddrSpaceMapTy &PredicatedAS, SmallVectorImpl<const Use *> *PoisonUsesToFix) const { … } // Defines the join operation on the address space lattice (see the file header // comments). unsigned InferAddressSpacesImpl::joinAddressSpaces(unsigned AS1, unsigned AS2) const { … } bool InferAddressSpacesImpl::run(Function &CurFn) { … } // Constants need to be tracked through RAUW to handle cases with nested // constant expressions, so wrap values in WeakTrackingVH. void InferAddressSpacesImpl::inferAddressSpaces( ArrayRef<WeakTrackingVH> Postorder, ValueToAddrSpaceMapTy &InferredAddrSpace, PredicatedAddrSpaceMapTy &PredicatedAS) const { … } unsigned InferAddressSpacesImpl::getPredicatedAddrSpace(const Value &Ptr, const Value *UserCtx) const { … } bool InferAddressSpacesImpl::updateAddressSpace( const Value &V, ValueToAddrSpaceMapTy &InferredAddrSpace, PredicatedAddrSpaceMapTy &PredicatedAS) const { … } /// Replace operand \p OpIdx in \p Inst, if the value is the same as \p OldVal /// with \p NewVal. static bool replaceOperandIfSame(Instruction *Inst, unsigned OpIdx, Value *OldVal, Value *NewVal) { … } template <typename InstrType> static bool replaceSimplePointerUse(const TargetTransformInfo &TTI, InstrType *MemInstr, unsigned AddrSpace, Value *OldV, Value *NewV) { … } /// If \p OldV is used as the pointer operand of a compatible memory operation /// \p Inst, replaces the pointer operand with NewV. /// /// This covers memory instructions with a single pointer operand that can have /// its address space changed by simply mutating the use to a new value. /// /// \p returns true the user replacement was made. static bool replaceIfSimplePointerUse(const TargetTransformInfo &TTI, User *Inst, unsigned AddrSpace, Value *OldV, Value *NewV) { … } /// Update memory intrinsic uses that require more complex processing than /// simple memory instructions. These require re-mangling and may have multiple /// pointer operands. static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV, Value *NewV) { … } // \p returns true if it is OK to change the address space of constant \p C with // a ConstantExpr addrspacecast. bool InferAddressSpacesImpl::isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const { … } static Value::use_iterator skipToNextUser(Value::use_iterator I, Value::use_iterator End) { … } void InferAddressSpacesImpl::performPointerReplacement( Value *V, Value *NewV, Use &U, ValueToValueMapTy &ValueWithNewAddrSpace, SmallVectorImpl<Instruction *> &DeadInstructions) const { … } bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces( ArrayRef<WeakTrackingVH> Postorder, const ValueToAddrSpaceMapTy &InferredAddrSpace, const PredicatedAddrSpaceMapTy &PredicatedAS) const { … } bool InferAddressSpaces::runOnFunction(Function &F) { … } FunctionPass *llvm::createInferAddressSpacesPass(unsigned AddressSpace) { … } InferAddressSpacesPass::InferAddressSpacesPass() : … { … } InferAddressSpacesPass::InferAddressSpacesPass(unsigned AddressSpace) : … { … } PreservedAnalyses InferAddressSpacesPass::run(Function &F, FunctionAnalysisManager &AM) { … }