NVPTXISelLowering.cpp | Explore in Territory

//===-- NVPTXISelLowering.cpp - NVPTX DAG Lowering Implementation ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that NVPTX uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//

#include "NVPTXISelLowering.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "NVPTXSubtarget.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXTargetObjectFile.h"
#include "NVPTXUtilities.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/FPEnv.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstdint>
#include <iterator>
#include <optional>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#define DEBUG_TYPE …

usingnamespacellvm;

static std::atomic<unsigned> GlobalUniqueCallSite;

static cl::opt<bool> sched4reg(
    "nvptx-sched4reg",
    cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));

static cl::opt<unsigned> FMAContractLevelOpt(
    "nvptx-fma-level", cl::Hidden,
    cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
             " 1: do it  2: do it aggressively"),
    cl::init(2));

static cl::opt<int> UsePrecDivF32(
    "nvptx-prec-divf32", cl::Hidden,
    cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
             " IEEE Compliant F32 div.rnd if available."),
    cl::init(2));

static cl::opt<bool> UsePrecSqrtF32(
    "nvptx-prec-sqrtf32", cl::Hidden,
    cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
    cl::init(true));

static cl::opt<bool> ForceMinByValParamAlign(
    "nvptx-force-min-byval-param-align", cl::Hidden,
    cl::desc("NVPTX Specific: force 4-byte minimal alignment for byval"
             " params of device functions."),
    cl::init(false));

int NVPTXTargetLowering::getDivF32Level() const { … }

bool NVPTXTargetLowering::usePrecSqrtF32() const { … }

bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const { … }

static bool IsPTXVectorType(MVT VT) { … }

static bool Is16bitsType(MVT VT) { … }

/// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
/// EVTs that compose it.  Unlike ComputeValueVTs, this will break apart vectors
/// into their primitive components.
/// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
/// same number of types as the Ins/Outs arrays in LowerFormalArguments,
/// LowerCall, and LowerReturn.
static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
                               Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
                               SmallVectorImpl<uint64_t> *Offsets = nullptr,
                               uint64_t StartingOffset = 0) { … }

/// PromoteScalarIntegerPTX
/// Used to make sure the arguments/returns are suitable for passing
/// and promote them to a larger size if they're not.
///
/// The promoted type is placed in \p PromoteVT if the function returns true.
static bool PromoteScalarIntegerPTX(const EVT &VT, MVT *PromotedVT) { … }

// Check whether we can merge loads/stores of some of the pieces of a
// flattened function parameter or return value into a single vector
// load/store.
//
// The flattened parameter is represented as a list of EVTs and
// offsets, and the whole structure is aligned to ParamAlignment. This
// function determines whether we can load/store pieces of the
// parameter starting at index Idx using a single vectorized op of
// size AccessSize. If so, it returns the number of param pieces
// covered by the vector op. Otherwise, it returns 1.
static unsigned CanMergeParamLoadStoresStartingAt(
    unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs,
    const SmallVectorImpl<uint64_t> &Offsets, Align ParamAlignment) { … }

// Flags for tracking per-element vectorization state of loads/stores
// of a flattened function parameter or return value.
enum ParamVectorizationFlags { … };

// Computes whether and how we can vectorize the loads/stores of a
// flattened function parameter or return value.
//
// The flattened parameter is represented as the list of ValueVTs and
// Offsets, and is aligned to ParamAlignment bytes. We return a vector
// of the same size as ValueVTs indicating how each piece should be
// loaded/stored (i.e. as a scalar, or as part of a vector
// load/store).
static SmallVector<ParamVectorizationFlags, 16>
VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs,
                     const SmallVectorImpl<uint64_t> &Offsets,
                     Align ParamAlignment, bool IsVAArg = false) { … }

// NVPTXTargetLowering Constructor.
NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
                                         const NVPTXSubtarget &STI)
    : … { … }

const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { … }

TargetLoweringBase::LegalizeTypeAction
NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const { … }

SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
                                             int Enabled, int &ExtraSteps,
                                             bool &UseOneConst,
                                             bool Reciprocal) const { … }

SDValue
NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { … }

static bool IsTypePassedAsArray(const Type *Ty) { … }

std::string NVPTXTargetLowering::getPrototype(
    const DataLayout &DL, Type *retTy, const ArgListTy &Args,
    const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign retAlignment,
    std::optional<std::pair<unsigned, const APInt &>> VAInfo,
    const CallBase &CB, unsigned UniqueCallSite) const { … }

Align NVPTXTargetLowering::getFunctionArgumentAlignment(
    const Function *F, Type *Ty, unsigned Idx, const DataLayout &DL) const { … }

Align NVPTXTargetLowering::getArgumentAlignment(const CallBase *CB, Type *Ty,
                                                unsigned Idx,
                                                const DataLayout &DL) const { … }

static bool adjustElementType(EVT &ElementType) { … }

// Use byte-store when the param address of the argument value is unaligned.
// This may happen when the return value is a field of a packed structure.
//
// This is called in LowerCall() when passing the param values.
static SDValue LowerUnalignedStoreParam(SelectionDAG &DAG, SDValue Chain,
                                        uint64_t Offset, EVT ElementType,
                                        SDValue StVal, SDValue &InGlue,
                                        unsigned ArgID, const SDLoc &dl) { … }

// Use byte-load when the param adress of the returned value is unaligned.
// This may happen when the returned value is a field of a packed structure.
static SDValue
LowerUnalignedLoadRetParam(SelectionDAG &DAG, SDValue &Chain, uint64_t Offset,
                           EVT ElementType, SDValue &InGlue,
                           SmallVectorImpl<SDValue> &TempProxyRegOps,
                           const SDLoc &dl) { … }

static bool shouldConvertToIndirectCall(const CallBase *CB,
                                        const GlobalAddressSDNode *Func) { … }

SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                       SmallVectorImpl<SDValue> &InVals) const { … }

SDValue NVPTXTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
                                                     SelectionDAG &DAG) const { … }

// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
// (see LegalizeDAG.cpp). This is slow and uses local memory.
// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
SDValue
NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { … }

// We can init constant f16x2/v2i16/v4i8 with a single .b32 move.  Normally it
// would get lowered as two constant loads and vector-packing move.
// Instead we want just a constant move:
//        mov.b32         %r2, 0x40003C00
SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op,
                                               SelectionDAG &DAG) const { … }

SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
                                                     SelectionDAG &DAG) const { … }

SDValue NVPTXTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
                                                    SelectionDAG &DAG) const { … }

SDValue NVPTXTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
                                                 SelectionDAG &DAG) const { … }
/// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
///    amount, or
/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
///    amount.
SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
                                                  SelectionDAG &DAG) const { … }

/// LowerShiftLeftParts - Lower SHL_PARTS, which
/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
///    amount, or
/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
///    amount.
SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
                                                 SelectionDAG &DAG) const { … }

/// If the types match, convert the generic copysign to the NVPTXISD version,
/// otherwise bail ensuring that mismatched cases are properly expaned.
SDValue NVPTXTargetLowering::LowerFCOPYSIGN(SDValue Op,
                                            SelectionDAG &DAG) const { … }

SDValue NVPTXTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const { … }

// This is the the rounding method used in CUDA libdevice in C like code:
// float roundf(float A)
// {
//   float RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f));
//   RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA;
//   return abs(A) < 0.5 ? (float)(int)A : RoundedA;
// }
SDValue NVPTXTargetLowering::LowerFROUND32(SDValue Op,
                                           SelectionDAG &DAG) const { … }

// The implementation of round(double) is similar to that of round(float) in
// that they both separate the value range into three regions and use a method
// specific to the region to round the values. However, round(double) first
// calculates the round of the absolute value and then adds the sign back while
// round(float) directly rounds the value with sign.
SDValue NVPTXTargetLowering::LowerFROUND64(SDValue Op,
                                           SelectionDAG &DAG) const { … }

SDValue NVPTXTargetLowering::LowerINT_TO_FP(SDValue Op,
                                            SelectionDAG &DAG) const { … }

SDValue NVPTXTargetLowering::LowerFP_TO_INT(SDValue Op,
                                            SelectionDAG &DAG) const { … }

SDValue NVPTXTargetLowering::LowerFP_ROUND(SDValue Op,
                                           SelectionDAG &DAG) const { … }

SDValue NVPTXTargetLowering::LowerFP_EXTEND(SDValue Op,
                                            SelectionDAG &DAG) const { … }

static SDValue LowerVectorArith(SDValue Op, SelectionDAG &DAG) { … }

SDValue
NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { … }

SDValue NVPTXTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { … }

// This will prevent AsmPrinter from trying to print the jump tables itself.
unsigned NVPTXTargetLowering::getJumpTableEncoding() const { … }

// This function is almost a copy of SelectionDAG::expandVAArg().
// The only diff is that this one produces loads from local address space.
SDValue NVPTXTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { … }

SDValue NVPTXTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { … }

SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const { … }

SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { … }

// v = ld i1* addr
//   =>
// v1 = ld i8* addr (-> i16)
// v = trunc i16 to i1
SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { … }

SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { … }

SDValue
NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { … }

// st i1 v, addr
//    =>
// v1 = zxt v to i16
// st.u8 i16, addr
SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { … }

SDValue NVPTXTargetLowering::LowerCopyToReg_128(SDValue Op,
                                                SelectionDAG &DAG) const { … }

unsigned NVPTXTargetLowering::getNumRegisters(
    LLVMContext &Context, EVT VT,
    std::optional<MVT> RegisterVT = std::nullopt) const { … }

bool NVPTXTargetLowering::splitValueIntoRegisterParts(
    SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
    unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { … }

// This creates target external symbol for a function parameter.
// Name of the symbol is composed from its index and the function name.
// Negative index corresponds to special parameter (unsized array) used for
// passing variable arguments.
SDValue NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx,
                                            EVT v) const { … }

SDValue NVPTXTargetLowering::LowerFormalArguments(
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { … }

// Use byte-store when the param adress of the return value is unaligned.
// This may happen when the return value is a field of a packed structure.
static SDValue LowerUnalignedStoreRet(SelectionDAG &DAG, SDValue Chain,
                                      uint64_t Offset, EVT ElementType,
                                      SDValue RetVal, const SDLoc &dl) { … }

SDValue
NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
                                 bool isVarArg,
                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                                 const SmallVectorImpl<SDValue> &OutVals,
                                 const SDLoc &dl, SelectionDAG &DAG) const { … }

void NVPTXTargetLowering::LowerAsmOperandForConstraint(
    SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
    SelectionDAG &DAG) const { … }

static unsigned getOpcForTextureInstr(unsigned Intrinsic) { … }

static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { … }

// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
// TgtMemIntrinsic
// because we need the information that is only available in the "Value" type
// of destination
// pointer. In particular, the address space information.
bool NVPTXTargetLowering::getTgtMemIntrinsic(
    IntrinsicInfo &Info, const CallInst &I,
    MachineFunction &MF, unsigned Intrinsic) const { … }

/// getFunctionParamOptimizedAlign - since function arguments are passed via
/// .param space, we may want to increase their alignment in a way that
/// ensures that we can effectively vectorize their loads & stores. We can
/// increase alignment only if the function has internal or has private
/// linkage as for other linkage types callers may already rely on default
/// alignment. To allow using 128-bit vectorized loads/stores, this function
/// ensures that alignment is 16 or greater.
Align NVPTXTargetLowering::getFunctionParamOptimizedAlign(
    const Function *F, Type *ArgTy, const DataLayout &DL) const { … }

/// Helper for computing alignment of a device function byval parameter.
Align NVPTXTargetLowering::getFunctionByValParamAlign(
    const Function *F, Type *ArgTy, Align InitialAlign,
    const DataLayout &DL) const { … }

// Helper for getting a function parameter name. Name is composed from
// its index and the function name. Negative index corresponds to special
// parameter (unsized array) used for passing variable arguments.
std::string NVPTXTargetLowering::getParamName(const Function *F,
                                              int Idx) const { … }

/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
/// Used to guide target specific optimizations, like loop strength reduction
/// (LoopStrengthReduce.cpp) and memory optimization for address mode
/// (CodeGenPrepare.cpp)
bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
                                                const AddrMode &AM, Type *Ty,
                                                unsigned AS, Instruction *I) const { … }

//===----------------------------------------------------------------------===//
//                         NVPTX Inline Assembly Support
//===----------------------------------------------------------------------===//

/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
NVPTXTargetLowering::ConstraintType
NVPTXTargetLowering::getConstraintType(StringRef Constraint) const { … }

std::pair<unsigned, const TargetRegisterClass *>
NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
                                                  StringRef Constraint,
                                                  MVT VT) const { … }

//===----------------------------------------------------------------------===//
//                         NVPTX DAG Combining
//===----------------------------------------------------------------------===//

bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
                                   CodeGenOptLevel OptLevel) const { … }

bool NVPTXTargetLowering::allowUnsafeFPMath(MachineFunction &MF) const { … }

static bool isConstZero(const SDValue &Operand) { … }

/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
/// operands N0 and N1.  This is a helper for PerformADDCombine that is
/// called with the default operands, and if that fails, with commuted
/// operands.
static SDValue
PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
                              TargetLowering::DAGCombinerInfo &DCI) { … }

static SDValue
PerformFADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
                               TargetLowering::DAGCombinerInfo &DCI,
                               CodeGenOptLevel OptLevel) { … }

static SDValue PerformStoreCombineHelper(SDNode *N, std::size_t Front,
                                         std::size_t Back) { … }

static SDValue PerformStoreParamCombine(SDNode *N) { … }

static SDValue PerformStoreRetvalCombine(SDNode *N) { … }

/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
///
static SDValue PerformADDCombine(SDNode *N,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 CodeGenOptLevel OptLevel) { … }

/// PerformFADDCombine - Target-specific dag combine xforms for ISD::FADD.
///
static SDValue PerformFADDCombine(SDNode *N,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 CodeGenOptLevel OptLevel) { … }

static SDValue PerformANDCombine(SDNode *N,
                                 TargetLowering::DAGCombinerInfo &DCI) { … }

static SDValue PerformREMCombine(SDNode *N,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 CodeGenOptLevel OptLevel) { … }

enum OperandSignedness { … };

/// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand
/// that can be demoted to \p OptSize bits without loss of information. The
/// signedness of the operand, if determinable, is placed in \p S.
static bool IsMulWideOperandDemotable(SDValue Op,
                                      unsigned OptSize,
                                      OperandSignedness &S) { … }

/// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can
/// be demoted to \p OptSize bits without loss of information. If the operands
/// contain a constant, it should appear as the RHS operand. The signedness of
/// the operands is placed in \p IsSigned.
static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS,
                                        unsigned OptSize,
                                        bool &IsSigned) { … }

/// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply
/// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform
/// works on both multiply DAG nodes and SHL DAG nodes with a constant shift
/// amount.
static SDValue TryMULWIDECombine(SDNode *N,
                                 TargetLowering::DAGCombinerInfo &DCI) { … }

static bool isConstOne(const SDValue &Operand) { … }

static SDValue matchMADConstOnePattern(SDValue Add) { … }

static SDValue combineMADConstOne(SDValue X, SDValue Add, EVT VT, SDLoc DL,
                                  TargetLowering::DAGCombinerInfo &DCI) { … }

static SDValue combineMulSelectConstOne(SDValue X, SDValue Select, EVT VT,
                                        SDLoc DL,
                                        TargetLowering::DAGCombinerInfo &DCI) { … }

static SDValue
PerformMULCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
                              TargetLowering::DAGCombinerInfo &DCI) { … }

/// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
static SDValue PerformMULCombine(SDNode *N,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 CodeGenOptLevel OptLevel) { … }

/// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
static SDValue PerformSHLCombine(SDNode *N,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 CodeGenOptLevel OptLevel) { … }

static SDValue PerformSETCCCombine(SDNode *N,
                                   TargetLowering::DAGCombinerInfo &DCI,
                                   unsigned int SmVersion) { … }

static SDValue PerformEXTRACTCombine(SDNode *N,
                                     TargetLowering::DAGCombinerInfo &DCI) { … }

static SDValue PerformVSELECTCombine(SDNode *N,
                                     TargetLowering::DAGCombinerInfo &DCI) { … }

static SDValue PerformLOADCombine(SDNode *N,
                                  TargetLowering::DAGCombinerInfo &DCI) { … }

SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
                                               DAGCombinerInfo &DCI) const { … }

/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
                              SmallVectorImpl<SDValue> &Results) { … }

static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
                                     SmallVectorImpl<SDValue> &Results) { … }

static void ReplaceCopyFromReg_128(SDNode *N, SelectionDAG &DAG,
                                   SmallVectorImpl<SDValue> &Results) { … }

void NVPTXTargetLowering::ReplaceNodeResults(
    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { … }

NVPTXTargetLowering::AtomicExpansionKind
NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { … }

// Pin NVPTXTargetObjectFile's vtables to this file.
NVPTXTargetObjectFile::~NVPTXTargetObjectFile() = default;

MCSection *NVPTXTargetObjectFile::SelectSectionForGlobal(
    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { … }
llvm/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp