llvm/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp

//===- InferAddressSpace.cpp - --------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// CUDA C/C++ includes memory space designation as variable type qualifers (such
// as __global__ and __shared__). Knowing the space of a memory access allows
// CUDA compilers to emit faster PTX loads and stores. For example, a load from
// shared memory can be translated to `ld.shared` which is roughly 10% faster
// than a generic `ld` on an NVIDIA Tesla K40c.
//
// Unfortunately, type qualifiers only apply to variable declarations, so CUDA
// compilers must infer the memory space of an address expression from
// type-qualified variables.
//
// LLVM IR uses non-zero (so-called) specific address spaces to represent memory
// spaces (e.g. addrspace(3) means shared memory). The Clang frontend
// places only type-qualified variables in specific address spaces, and then
// conservatively `addrspacecast`s each type-qualified variable to addrspace(0)
// (so-called the generic address space) for other instructions to use.
//
// For example, the Clang translates the following CUDA code
//   __shared__ float a[10];
//   float v = a[i];
// to
//   %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
//   %1 = gep [10 x float], [10 x float]* %0, i64 0, i64 %i
//   %v = load float, float* %1 ; emits ld.f32
// @a is in addrspace(3) since it's type-qualified, but its use from %1 is
// redirected to %0 (the generic version of @a).
//
// The optimization implemented in this file propagates specific address spaces
// from type-qualified variable declarations to its users. For example, it
// optimizes the above IR to
//   %1 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
//   %v = load float addrspace(3)* %1 ; emits ld.shared.f32
// propagating the addrspace(3) from @a to %1. As the result, the NVPTX
// codegen is able to emit ld.shared.f32 for %v.
//
// Address space inference works in two steps. First, it uses a data-flow
// analysis to infer as many generic pointers as possible to point to only one
// specific address space. In the above example, it can prove that %1 only
// points to addrspace(3). This algorithm was published in
//   CUDA: Compiling and optimizing for a GPU platform
//   Chakrabarti, Grover, Aarts, Kong, Kudlur, Lin, Marathe, Murphy, Wang
//   ICCS 2012
//
// Then, address space inference replaces all refinable generic pointers with
// equivalent specific pointers.
//
// The major challenge of implementing this optimization is handling PHINodes,
// which may create loops in the data flow graph. This brings two complications.
//
// First, the data flow analysis in Step 1 needs to be circular. For example,
//     %generic.input = addrspacecast float addrspace(3)* %input to float*
//   loop:
//     %y = phi [ %generic.input, %y2 ]
//     %y2 = getelementptr %y, 1
//     %v = load %y2
//     br ..., label %loop, ...
// proving %y specific requires proving both %generic.input and %y2 specific,
// but proving %y2 specific circles back to %y. To address this complication,
// the data flow analysis operates on a lattice:
//   uninitialized > specific address spaces > generic.
// All address expressions (our implementation only considers phi, bitcast,
// addrspacecast, and getelementptr) start with the uninitialized address space.
// The monotone transfer function moves the address space of a pointer down a
// lattice path from uninitialized to specific and then to generic. A join
// operation of two different specific address spaces pushes the expression down
// to the generic address space. The analysis completes once it reaches a fixed
// point.
//
// Second, IR rewriting in Step 2 also needs to be circular. For example,
// converting %y to addrspace(3) requires the compiler to know the converted
// %y2, but converting %y2 needs the converted %y. To address this complication,
// we break these cycles using "poison" placeholders. When converting an
// instruction `I` to a new address space, if its operand `Op` is not converted
// yet, we let `I` temporarily use `poison` and fix all the uses later.
// For instance, our algorithm first converts %y to
//   %y' = phi float addrspace(3)* [ %input, poison ]
// Then, it converts %y2 to
//   %y2' = getelementptr %y', 1
// Finally, it fixes the poison in %y' so that
//   %y' = phi float addrspace(3)* [ %input, %y2' ]
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <cassert>
#include <iterator>
#include <limits>
#include <utility>
#include <vector>

#define DEBUG_TYPE

usingnamespacellvm;

static cl::opt<bool> AssumeDefaultIsFlatAddressSpace(
    "assume-default-is-flat-addrspace", cl::init(false), cl::ReallyHidden,
    cl::desc("The default address space is assumed as the flat address space. "
             "This is mainly for test purpose."));

static const unsigned UninitializedAddressSpace =;

namespace {

ValueToAddrSpaceMapTy;
// Different from ValueToAddrSpaceMapTy, where a new addrspace is inferred on
// the *def* of a value, PredicatedAddrSpaceMapTy is map where a new
// addrspace is inferred on the *use* of a pointer. This map is introduced to
// infer addrspace from the addrspace predicate assumption built from assume
// intrinsic. In that scenario, only specific uses (under valid assumption
// context) could be inferred with a new addrspace.
PredicatedAddrSpaceMapTy;
PostorderStackTy;

class InferAddressSpaces : public FunctionPass {};

class InferAddressSpacesImpl {};

} // end anonymous namespace

char InferAddressSpaces::ID =;

INITIALIZE_PASS_BEGIN(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
                      false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
                    false, false)

static Type *getPtrOrVecOfPtrsWithNewAS(Type *Ty, unsigned NewAddrSpace) {}

// Check whether that's no-op pointer bicast using a pair of
// `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over
// different address spaces.
static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL,
                                 const TargetTransformInfo *TTI) {}

// Returns true if V is an address expression.
// TODO: Currently, we consider only phi, bitcast, addrspacecast, and
// getelementptr operators.
static bool isAddressExpression(const Value &V, const DataLayout &DL,
                                const TargetTransformInfo *TTI) {}

// Returns the pointer operands of V.
//
// Precondition: V is an address expression.
static SmallVector<Value *, 2>
getPointerOperands(const Value &V, const DataLayout &DL,
                   const TargetTransformInfo *TTI) {}

bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
                                                      Value *OldV,
                                                      Value *NewV) const {}

void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(
    IntrinsicInst *II, PostorderStackTy &PostorderStack,
    DenseSet<Value *> &Visited) const {}

// Returns all flat address expressions in function F. The elements are
// If V is an unvisited flat address expression, appends V to PostorderStack
// and marks it as visited.
void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
    Value *V, PostorderStackTy &PostorderStack,
    DenseSet<Value *> &Visited) const {}

// Returns all flat address expressions in function F. The elements are ordered
// in postorder.
std::vector<WeakTrackingVH>
InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {}

// A helper function for cloneInstructionWithNewAddressSpace. Returns the clone
// of OperandUse.get() in the new address space. If the clone is not ready yet,
// returns poison in the new address space as a placeholder.
static Value *operandWithNewAddressSpaceOrCreatePoison(
    const Use &OperandUse, unsigned NewAddrSpace,
    const ValueToValueMapTy &ValueWithNewAddrSpace,
    const PredicatedAddrSpaceMapTy &PredicatedAS,
    SmallVectorImpl<const Use *> *PoisonUsesToFix) {}

// Returns a clone of `I` with its operands converted to those specified in
// ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an
// operand whose address space needs to be modified might not exist in
// ValueWithNewAddrSpace. In that case, uses poison as a placeholder operand and
// adds that operand use to PoisonUsesToFix so that caller can fix them later.
//
// Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast
// from a pointer whose type already matches. Therefore, this function returns a
// Value* instead of an Instruction*.
//
// This may also return nullptr in the case the instruction could not be
// rewritten.
Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
    Instruction *I, unsigned NewAddrSpace,
    const ValueToValueMapTy &ValueWithNewAddrSpace,
    const PredicatedAddrSpaceMapTy &PredicatedAS,
    SmallVectorImpl<const Use *> *PoisonUsesToFix) const {}

// Similar to cloneInstructionWithNewAddressSpace, returns a clone of the
// constant expression `CE` with its operands replaced as specified in
// ValueWithNewAddrSpace.
static Value *cloneConstantExprWithNewAddressSpace(
    ConstantExpr *CE, unsigned NewAddrSpace,
    const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL,
    const TargetTransformInfo *TTI) {}

// Returns a clone of the value `V`, with its operands replaced as specified in
// ValueWithNewAddrSpace. This function is called on every flat address
// expression whose address space needs to be modified, in postorder.
//
// See cloneInstructionWithNewAddressSpace for the meaning of PoisonUsesToFix.
Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
    Value *V, unsigned NewAddrSpace,
    const ValueToValueMapTy &ValueWithNewAddrSpace,
    const PredicatedAddrSpaceMapTy &PredicatedAS,
    SmallVectorImpl<const Use *> *PoisonUsesToFix) const {}

// Defines the join operation on the address space lattice (see the file header
// comments).
unsigned InferAddressSpacesImpl::joinAddressSpaces(unsigned AS1,
                                                   unsigned AS2) const {}

bool InferAddressSpacesImpl::run(Function &CurFn) {}

// Constants need to be tracked through RAUW to handle cases with nested
// constant expressions, so wrap values in WeakTrackingVH.
void InferAddressSpacesImpl::inferAddressSpaces(
    ArrayRef<WeakTrackingVH> Postorder,
    ValueToAddrSpaceMapTy &InferredAddrSpace,
    PredicatedAddrSpaceMapTy &PredicatedAS) const {}

unsigned
InferAddressSpacesImpl::getPredicatedAddrSpace(const Value &Ptr,
                                               const Value *UserCtx) const {}

bool InferAddressSpacesImpl::updateAddressSpace(
    const Value &V, ValueToAddrSpaceMapTy &InferredAddrSpace,
    PredicatedAddrSpaceMapTy &PredicatedAS) const {}

/// Replace operand \p OpIdx in \p Inst, if the value is the same as \p OldVal
/// with \p NewVal.
static bool replaceOperandIfSame(Instruction *Inst, unsigned OpIdx,
                                 Value *OldVal, Value *NewVal) {}

template <typename InstrType>
static bool replaceSimplePointerUse(const TargetTransformInfo &TTI,
                                    InstrType *MemInstr, unsigned AddrSpace,
                                    Value *OldV, Value *NewV) {}

/// If \p OldV is used as the pointer operand of a compatible memory operation
/// \p Inst, replaces the pointer operand with NewV.
///
/// This covers memory instructions with a single pointer operand that can have
/// its address space changed by simply mutating the use to a new value.
///
/// \p returns true the user replacement was made.
static bool replaceIfSimplePointerUse(const TargetTransformInfo &TTI,
                                      User *Inst, unsigned AddrSpace,
                                      Value *OldV, Value *NewV) {}

/// Update memory intrinsic uses that require more complex processing than
/// simple memory instructions. These require re-mangling and may have multiple
/// pointer operands.
static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV,
                                     Value *NewV) {}

// \p returns true if it is OK to change the address space of constant \p C with
// a ConstantExpr addrspacecast.
bool InferAddressSpacesImpl::isSafeToCastConstAddrSpace(Constant *C,
                                                        unsigned NewAS) const {}

static Value::use_iterator skipToNextUser(Value::use_iterator I,
                                          Value::use_iterator End) {}

void InferAddressSpacesImpl::performPointerReplacement(
    Value *V, Value *NewV, Use &U, ValueToValueMapTy &ValueWithNewAddrSpace,
    SmallVectorImpl<Instruction *> &DeadInstructions) const {}

bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
    ArrayRef<WeakTrackingVH> Postorder,
    const ValueToAddrSpaceMapTy &InferredAddrSpace,
    const PredicatedAddrSpaceMapTy &PredicatedAS) const {}

bool InferAddressSpaces::runOnFunction(Function &F) {}

FunctionPass *llvm::createInferAddressSpacesPass(unsigned AddressSpace) {}

InferAddressSpacesPass::InferAddressSpacesPass()
    :{}
InferAddressSpacesPass::InferAddressSpacesPass(unsigned AddressSpace)
    :{}

PreservedAnalyses InferAddressSpacesPass::run(Function &F,
                                              FunctionAnalysisManager &AM) {}