AMDGPUAtomicOptimizer.cpp | Explore in Territory

//===-- AMDGPUAtomicOptimizer.cpp -----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This pass optimizes atomic operations by using a single lane of a wavefront
/// to perform the atomic operation, thus reducing contention on that memory
/// location.
/// Atomic optimizer uses following strategies to compute scan and reduced
/// values
/// 1. DPP -
///   This is the most efficient implementation for scan. DPP uses Whole Wave
///   Mode (WWM)
/// 2. Iterative -
//    An alternative implementation iterates over all active lanes
///   of Wavefront using llvm.cttz and performs scan  using readlane & writelane
///   intrinsics
//===----------------------------------------------------------------------===//

#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"

#define DEBUG_TYPE …

usingnamespacellvm;
usingnamespacellvm::AMDGPU;

namespace {

struct ReplacementInfo { … };

class AMDGPUAtomicOptimizer : public FunctionPass { … };

class AMDGPUAtomicOptimizerImpl
    : public InstVisitor<AMDGPUAtomicOptimizerImpl> { … };

} // namespace

char AMDGPUAtomicOptimizer::ID = …;

char &llvm::AMDGPUAtomicOptimizerID = …;

bool AMDGPUAtomicOptimizer::runOnFunction(Function &F) { … }

PreservedAnalyses AMDGPUAtomicOptimizerPass::run(Function &F,
                                                 FunctionAnalysisManager &AM) { … }

bool AMDGPUAtomicOptimizerImpl::run(Function &F) { … }

static bool isLegalCrossLaneType(Type *Ty) { … }

void AMDGPUAtomicOptimizerImpl::visitAtomicRMWInst(AtomicRMWInst &I) { … }

void AMDGPUAtomicOptimizerImpl::visitIntrinsicInst(IntrinsicInst &I) { … }

// Use the builder to create the non-atomic counterpart of the specified
// atomicrmw binary op.
static Value *buildNonAtomicBinOp(IRBuilder<> &B, AtomicRMWInst::BinOp Op,
                                  Value *LHS, Value *RHS) { … }

// Use the builder to create a reduction of V across the wavefront, with all
// lanes active, returning the same result in all lanes.
Value *AMDGPUAtomicOptimizerImpl::buildReduction(IRBuilder<> &B,
                                                 AtomicRMWInst::BinOp Op,
                                                 Value *V,
                                                 Value *const Identity) const { … }

// Use the builder to create an inclusive scan of V across the wavefront, with
// all lanes active.
Value *AMDGPUAtomicOptimizerImpl::buildScan(IRBuilder<> &B,
                                            AtomicRMWInst::BinOp Op, Value *V,
                                            Value *Identity) const { … }

// Use the builder to create a shift right of V across the wavefront, with all
// lanes active, to turn an inclusive scan into an exclusive scan.
Value *AMDGPUAtomicOptimizerImpl::buildShiftRight(IRBuilder<> &B, Value *V,
                                                  Value *Identity) const { … }

// Use the builder to create an exclusive scan and compute the final reduced
// value using an iterative approach. This provides an alternative
// implementation to DPP which uses WMM for scan computations. This API iterate
// over active lanes to read, compute and update the value using
// readlane and writelane intrinsics.
std::pair<Value *, Value *> AMDGPUAtomicOptimizerImpl::buildScanIteratively(
    IRBuilder<> &B, AtomicRMWInst::BinOp Op, Value *const Identity, Value *V,
    Instruction &I, BasicBlock *ComputeLoop, BasicBlock *ComputeEnd) const { … }

static Constant *getIdentityValueForAtomicOp(Type *const Ty,
                                             AtomicRMWInst::BinOp Op) { … }

static Value *buildMul(IRBuilder<> &B, Value *LHS, Value *RHS) { … }

void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
                                               AtomicRMWInst::BinOp Op,
                                               unsigned ValIdx,
                                               bool ValDivergent) const { … }

INITIALIZE_PASS_BEGIN(AMDGPUAtomicOptimizer, DEBUG_TYPE,
                      "AMDGPU atomic optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(AMDGPUAtomicOptimizer, DEBUG_TYPE,
                    "AMDGPU atomic optimizations", false, false)

FunctionPass *llvm::createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy) { … }
llvm/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp