DAGCombiner.cpp | Explore in Territory

//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
// both before and after the DAG is legalized.
//
// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
// primarily intended to handle simplification opportunities that are implicit
// in the LLVM IR and exposed by the various codegen lowering phases.
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/ByteProvider.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/RuntimeLibcallUtil.h"
#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <functional>
#include <iterator>
#include <optional>
#include <string>
#include <tuple>
#include <utility>
#include <variant>

#include "MatchContext.h"

usingnamespacellvm;
usingnamespacellvm::SDPatternMatch;

#define DEBUG_TYPE …

STATISTIC(NodesCombined   , "Number of dag nodes combined");
STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
STATISTIC(SlicedLoads, "Number of load sliced");
STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");

DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
              "Controls whether a DAG combine is performed for a node");

static cl::opt<bool>
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
                 cl::desc("Enable DAG combiner's use of IR alias analysis"));

static cl::opt<bool>
UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
        cl::desc("Enable DAG combiner's use of TBAA"));

#ifndef NDEBUG
static cl::opt<std::string>
CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
                   cl::desc("Only use DAG-combiner alias analysis in this"
                            " function"));
#endif

/// Hidden option to stress test load slicing, i.e., when this option
/// is enabled, load slicing bypasses most of its profitability guards.
static cl::opt<bool>
StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
                  cl::desc("Bypass the profitability model of load slicing"),
                  cl::init(false));

static cl::opt<bool>
  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
                    cl::desc("DAG combiner may split indexing from loads"));

static cl::opt<bool>
    EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
                       cl::desc("DAG combiner enable merging multiple stores "
                                "into a wider store"));

static cl::opt<unsigned> TokenFactorInlineLimit(
    "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
    cl::desc("Limit the number of operands to inline for Token Factors"));

static cl::opt<unsigned> StoreMergeDependenceLimit(
    "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
    cl::desc("Limit the number of times for the same StoreNode and RootNode "
             "to bail out in store merging dependence check"));

static cl::opt<bool> EnableReduceLoadOpStoreWidth(
    "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
    cl::desc("DAG combiner enable reducing the width of load/op/store "
             "sequence"));

static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
    "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
    cl::desc("DAG combiner enable load/<replace bytes>/store with "
             "a narrower store"));

static cl::opt<bool> EnableVectorFCopySignExtendRound(
    "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
    cl::desc(
        "Enable merging extends and rounds into FCOPYSIGN on vector types"));

namespace {

  class DAGCombiner { … };

/// This class is a DAGUpdateListener that removes any deleted
/// nodes from the worklist.
class WorklistRemover : public SelectionDAG::DAGUpdateListener { … };

class WorklistInserter : public SelectionDAG::DAGUpdateListener { … };

} // end anonymous namespace

//===----------------------------------------------------------------------===//
//  TargetLowering::DAGCombinerInfo implementation
//===----------------------------------------------------------------------===//

void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { … }

SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { … }

SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, SDValue Res, bool AddTo) { … }

SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { … }

bool TargetLowering::DAGCombinerInfo::
recursivelyDeleteUnusedNodes(SDNode *N) { … }

void TargetLowering::DAGCombinerInfo::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { … }

//===----------------------------------------------------------------------===//
// Helper Functions
//===----------------------------------------------------------------------===//

void DAGCombiner::deleteAndRecombine(SDNode *N) { … }

// APInts must be the same size for most operations, this helper
// function zero extends the shorter of the pair so that they match.
// We provide an Offset so that we can create bitwidths that won't overflow.
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) { … }

// Return true if this node is a setcc, or is a select_cc
// that selects between the target values used for true and false, making it
// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
// the appropriate nodes based on the type of node we are checking. This
// simplifies life a bit for the callers.
bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
                                    SDValue &CC, bool MatchStrict) const { … }

/// Return true if this is a SetCC-equivalent operation with only one use.
/// If this is true, it allows the users to invert the operation for free when
/// it is profitable to do so.
bool DAGCombiner::isOneUseSetCC(SDValue N) const { … }

static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) { … }

// Determines if it is a constant integer or a splat/build vector of constant
// integers (and undefs).
// Do not permit build vector implicit truncation.
static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) { … }

// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
// undef's.
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) { … }

// Determine if this an indexed load with an opaque target constant index.
static bool canSplitIdx(LoadSDNode *LD) { … }

bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
                                                             const SDLoc &DL,
                                                             SDNode *N,
                                                             SDValue N0,
                                                             SDValue N1) { … }

/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
/// \p N0 is the same kind of operation as \p Opc.
SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
                                               SDValue N0, SDValue N1,
                                               SDNodeFlags Flags) { … }

/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
/// same kind of operation as \p Opc.
SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
                                    SDValue N1, SDNodeFlags Flags) { … }

// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
// Note that we only expect Flags to be passed from FP operations. For integer
// operations they need to be dropped.
SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
                                          const SDLoc &DL, EVT VT, SDValue N0,
                                          SDValue N1, SDNodeFlags Flags) { … }

SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
                               bool AddTo) { … }

void DAGCombiner::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { … }

/// Check the specified integer node value to see if it can be simplified or if
/// things it uses can be simplified by bit propagation. If so, return true.
bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
                                       const APInt &DemandedElts,
                                       bool AssumeSingleUse) { … }

/// Check the specified vector node value to see if it can be simplified or
/// if things it uses can be simplified as it only uses some of the elements.
/// If so, return true.
bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
                                             const APInt &DemandedElts,
                                             bool AssumeSingleUse) { … }

void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { … }

SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { … }

SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { … }

SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { … }

/// Promote the specified integer binary operation if the target indicates it is
/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
/// i32 since i16 instructions are longer.
SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { … }

/// Promote the specified integer shift operation if the target indicates it is
/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
/// i32 since i16 instructions are longer.
SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { … }

SDValue DAGCombiner::PromoteExtend(SDValue Op) { … }

bool DAGCombiner::PromoteLoad(SDValue Op) { … }

/// Recursively delete a node which has no uses and any operands for
/// which it is the only use.
///
/// Note that this both deletes the nodes and removes them from the worklist.
/// It also adds any nodes who have had a user deleted to the worklist as they
/// may now have only one use and subject to other combines.
bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { … }

//===----------------------------------------------------------------------===//
//  Main DAG Combiner implementation
//===----------------------------------------------------------------------===//

void DAGCombiner::Run(CombineLevel AtLevel) { … }

SDValue DAGCombiner::visit(SDNode *N) { … }

SDValue DAGCombiner::combine(SDNode *N) { … }

/// Given a node, return its input chain if it has one, otherwise return a null
/// sd operand.
static SDValue getInputChainForNode(SDNode *N) { … }

SDValue DAGCombiner::visitTokenFactor(SDNode *N) { … }

/// MERGE_VALUES can always be eliminated.
SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { … }

/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
/// ConstantSDNode pointer else nullptr.
static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { … }

// isTruncateOf - If N is a truncate of some other value, return true, record
// the value being truncated in Op and which of Op's bits are zero/one in Known.
// This function computes KnownBits to avoid a duplicated call to
// computeKnownBits in the caller.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
                         KnownBits &Known) { … }

/// Return true if 'Use' is a load or a store that uses N as its base pointer
/// and that N may be folded in the load / store addressing mode.
static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
                                    const TargetLowering &TLI) { … }

/// This inverts a canonicalization in IR that replaces a variable select arm
/// with an identity constant. Codegen improves if we re-use the variable
/// operand rather than load a constant. This can also be converted into a
/// masked vector operation if the target supports it.
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
                                              bool ShouldCommuteOperands) { … }

SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { … }

static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL,
                                         SelectionDAG &DAG) { … }

// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) { … }

/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
/// a shift and add with a different constant.
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL,
                                   SelectionDAG &DAG) { … }

static bool
areBitwiseNotOfEachother(SDValue Op0, SDValue Op1) { … }

/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
/// are no common bits set in the operands).
SDValue DAGCombiner::visitADDLike(SDNode *N) { … }

// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) { … }

SDValue DAGCombiner::visitADD(SDNode *N) { … }

SDValue DAGCombiner::visitADDSAT(SDNode *N) { … }

static SDValue getAsCarry(const TargetLowering &TLI, SDValue V,
                          bool ForceCarryReconstruction = false) { … }

/// Given the operands of an add/sub operation, see if the 2nd operand is a
/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
/// the opcode and bypass the mask operation.
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
                                 SelectionDAG &DAG, const SDLoc &DL) { … }

/// Helper for doing combines based on N0 and N1 being added to each other.
SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
                                             SDNode *LocReference) { … }

SDValue DAGCombiner::visitADDC(SDNode *N) { … }

/**
 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
 * then the flip also occurs if computing the inverse is the same cost.
 * This function returns an empty SDValue in case it cannot flip the boolean
 * without increasing the cost of the computation. If you want to flip a boolean
 * no matter what, use DAG.getLogicalNOT.
 */
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
                                  const TargetLowering &TLI,
                                  bool Force) { … }

SDValue DAGCombiner::visitADDO(SDNode *N) { … }

SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) { … }

SDValue DAGCombiner::visitADDE(SDNode *N) { … }

SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) { … }

/**
 * If we are facing some sort of diamond carry propagation pattern try to
 * break it up to generate something like:
 *   (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
 *
 * The end result is usually an increase in operation required, but because the
 * carry is now linearized, other transforms can kick in and optimize the DAG.
 *
 * Patterns typically look something like
 *                (uaddo A, B)
 *                /          \
 *             Carry         Sum
 *               |             \
 *               | (uaddo_carry *, 0, Z)
 *               |       /
 *                \   Carry
 *                 |   /
 * (uaddo_carry X, *, *)
 *
 * But numerous variation exist. Our goal is to identify A, B, X and Z and
 * produce a combine with a single path for carry propagation.
 */
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner,
                                         SelectionDAG &DAG, SDValue X,
                                         SDValue Carry0, SDValue Carry1,
                                         SDNode *N) { … }

// If we are facing some sort of diamond carry/borrow in/out pattern try to
// match patterns like:
//
//          (uaddo A, B)            CarryIn
//            |  \                     |
//            |   \                    |
//    PartialSum   PartialCarryOutX   /
//            |        |             /
//            |    ____|____________/
//            |   /    |
//     (uaddo *, *)    \________
//       |  \                   \
//       |   \                   |
//       |    PartialCarryOutY   |
//       |        \              |
//       |         \            /
//   AddCarrySum    |    ______/
//                  |   /
//   CarryOut = (or *, *)
//
// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
//
//    {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
//
// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
// with a single path for carry/borrow out propagation.
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
                                   SDValue N0, SDValue N1, SDNode *N) { … }

SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
                                          SDValue CarryIn, SDNode *N) { … }

SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
                                          SDValue CarryIn, SDNode *N) { … }

SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) { … }

// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
// clamp/truncation if necessary.
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
                                   SDValue RHS, SelectionDAG &DAG,
                                   const SDLoc &DL) { … }

// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
// usubsat(a,b), optionally as a truncated type.
SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) { … }

// Since it may not be valid to emit a fold to zero for vector initializers
// check if we can before folding.
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
                             SelectionDAG &DAG, bool LegalOperations) { … }

SDValue DAGCombiner::visitSUB(SDNode *N) { … }

SDValue DAGCombiner::visitSUBSAT(SDNode *N) { … }

SDValue DAGCombiner::visitSUBC(SDNode *N) { … }

SDValue DAGCombiner::visitSUBO(SDNode *N) { … }

SDValue DAGCombiner::visitSUBE(SDNode *N) { … }

SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) { … }

SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) { … }

// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
// UMULFIXSAT here.
SDValue DAGCombiner::visitMULFIX(SDNode *N) { … }

template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) { … }

/// Return true if divmod libcall is available.
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
                                     const TargetLowering &TLI) { … }

/// Issue divrem if both quotient and remainder are needed.
SDValue DAGCombiner::useDivRem(SDNode *Node) { … }

static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { … }

SDValue DAGCombiner::visitSDIV(SDNode *N) { … }

static bool isDivisorPowerOfTwo(SDValue Divisor) { … }

SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { … }

SDValue DAGCombiner::visitUDIV(SDNode *N) { … }

SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) { … }

SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) { … }

// handles ISD::SREM and ISD::UREM
SDValue DAGCombiner::visitREM(SDNode *N) { … }

SDValue DAGCombiner::visitMULHS(SDNode *N) { … }

SDValue DAGCombiner::visitMULHU(SDNode *N) { … }

SDValue DAGCombiner::visitAVG(SDNode *N) { … }

SDValue DAGCombiner::visitABD(SDNode *N) { … }

/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
/// give the opcodes for the two computations that are being performed. Return
/// true if a simplification was made.
SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
                                                unsigned HiOp) { … }

SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { … }

SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { … }

SDValue DAGCombiner::visitMULO(SDNode *N) { … }

// Function to calculate whether the Min/Max pair of SDNodes (potentially
// swapped around) make a signed saturate pattern, clamping to between a signed
// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
// Returns the node being clamped and the bitwidth of the clamp in BW. Should
// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
// same as SimplifySelectCC. N0<N1 ? N2 : N3.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
                                  SDValue N3, ISD::CondCode CC, unsigned &BW,
                                  bool &Unsigned, SelectionDAG &DAG) { … }

static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
                                           SDValue N3, ISD::CondCode CC,
                                           SelectionDAG &DAG) { … }

static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
                                         SDValue N3, ISD::CondCode CC,
                                         SelectionDAG &DAG) { … }

SDValue DAGCombiner::visitIMINMAX(SDNode *N) { … }

/// If this is a bitwise logic instruction and both operands have the same
/// opcode, try to sink the other opcode after the logic instruction.
SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) { … }

/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
                                       const SDLoc &DL) { … }

static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
                                   SelectionDAG &DAG) { … }

static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
                                  SelectionDAG &DAG) { … }

// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
                                     ISD::CondCode CC, unsigned OrAndOpcode,
                                     SelectionDAG &DAG,
                                     bool isFMAXNUMFMINNUM_IEEE,
                                     bool isFMAXNUMFMINNUM) { … }

static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) { … }

// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
// We canonicalize to the `select` form in the middle end, but the `and` form
// gets better codegen and all tested targets (arm, x86, riscv)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F,
                                     const SDLoc &DL, SelectionDAG &DAG) { … }

/// This contains all DAGCombine rules which reduce two values combined by
/// an And operation to a single value. This makes them reusable in the context
/// of visitSELECT(). Rules involving constants are not included as
/// visitSELECT() already handles those cases.
SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) { … }

bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
                                   EVT LoadResultTy, EVT &ExtVT) { … }

bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
                                    ISD::LoadExtType ExtType, EVT &MemVT,
                                    unsigned ShAmt) { … }

bool DAGCombiner::SearchForAndLoads(SDNode *N,
                                    SmallVectorImpl<LoadSDNode*> &Loads,
                                    SmallPtrSetImpl<SDNode*> &NodesWithConsts,
                                    ConstantSDNode *Mask,
                                    SDNode *&NodeToMask) { … }

bool DAGCombiner::BackwardsPropagateMask(SDNode *N) { … }

// Unfold
//    x &  (-1 'logical shift' y)
// To
//    (x 'opposite logical shift' y) 'logical shift' y
// if it is better for performance.
SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) { … }

/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
/// For a target with a bit test, this is expected to become test + set and save
/// at least 1 instruction.
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) { … }

/// For targets that support usubsat, match a bit-hack form of that operation
/// that ends in 'and' and convert it.
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL) { … }

/// Given a bitwise logic operation N with a matching bitwise logic operand,
/// fold a pattern where 2 of the source operands are identically shifted
/// values. For example:
/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
                                 SelectionDAG &DAG) { … }

/// Given a tree of logic operations with shape like
/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
/// try to match and fold shift operations with the same shift amount.
/// For example:
/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand,
                                     SDValue RightHand, SelectionDAG &DAG) { … }

SDValue DAGCombiner::visitAND(SDNode *N) { … }

/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
                                        bool DemandHighBits) { … }

/// Return true if the specified node is an element that makes up a 32-bit
/// packed halfword byteswap.
/// ((x & 0x000000ff) << 8) |
/// ((x & 0x0000ff00) >> 8) |
/// ((x & 0x00ff0000) << 8) |
/// ((x & 0xff000000) >> 8)
static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { … }

// Match 2 elements of a packed halfword bswap.
static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) { … }

// Match this pattern:
//   (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
// And rewrite this to:
//   (rotr (bswap A), 16)
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
                                       SelectionDAG &DAG, SDNode *N, SDValue N0,
                                       SDValue N1, EVT VT) { … }

/// Match a 32-bit packed halfword bswap. That is
/// ((x & 0x000000ff) << 8) |
/// ((x & 0x0000ff00) >> 8) |
/// ((x & 0x00ff0000) << 8) |
/// ((x & 0xff000000) >> 8)
/// => (rotl (bswap x), 16)
SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { … }

/// This contains all DAGCombine rules which reduce two values combined by
/// an Or operation to a single value \see visitANDLike().
SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) { … }

/// OR combines for which the commuted variant will be tried as well.
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
                                  SDNode *N) { … }

SDValue DAGCombiner::visitOR(SDNode *N) { … }

static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op,
                                 SDValue &Mask) { … }

/// Match "(X shl/srl V1) & V2" where V2 may not be present.
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
                            SDValue &Mask) { … }

/// Helper function for visitOR to extract the needed side of a rotate idiom
/// from a shl/srl/mul/udiv.  This is meant to handle cases where
/// InstCombine merged some outside op with one of the shifts from
/// the rotate pattern.
/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
/// Otherwise, returns an expansion of \p ExtractFrom based on the following
/// patterns:
///
///   (or (add v v) (shrl v bitwidth-1)):
///     expands (add v v) -> (shl v 1)
///
///   (or (mul v c0) (shrl (mul v c1) c2)):
///     expands (mul v c0) -> (shl (mul v c1) c3)
///
///   (or (udiv v c0) (shl (udiv v c1) c2)):
///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
///
///   (or (shl v c0) (shrl (shl v c1) c2)):
///     expands (shl v c0) -> (shl (shl v c1) c3)
///
///   (or (shrl v c0) (shl (shrl v c1) c2)):
///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
///
/// Such that in all cases, c3+c2==bitwidth(op v c1).
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
                                     SDValue ExtractFrom, SDValue &Mask,
                                     const SDLoc &DL) { … }

// Return true if we can prove that, whenever Neg and Pos are both in the
// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
//
//     (or (shift1 X, Neg), (shift2 X, Pos))
//
// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
// in direction shift1 by Neg.  The range [0, EltSize) means that we only need
// to consider shift amounts with defined behavior.
//
// The IsRotate flag should be set when the LHS of both shifts is the same.
// Otherwise if matching a general funnel shift, it should be clear.
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
                           SelectionDAG &DAG, bool IsRotate) { … }

// A subroutine of MatchRotate used once we have found an OR of two opposite
// shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
// former being preferred if supported.  InnerPos and InnerNeg are Pos and
// Neg with outer conversions stripped away.
SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
                                       SDValue Neg, SDValue InnerPos,
                                       SDValue InnerNeg, bool HasPos,
                                       unsigned PosOpcode, unsigned NegOpcode,
                                       const SDLoc &DL) { … }

// A subroutine of MatchRotate used once we have found an OR of two opposite
// shifts of N0 + N1.  If Neg == <operand size> - Pos then the OR reduces
// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
// former being preferred if supported.  InnerPos and InnerNeg are Pos and
// Neg with outer conversions stripped away.
// TODO: Merge with MatchRotatePosNeg.
SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
                                       SDValue Neg, SDValue InnerPos,
                                       SDValue InnerNeg, bool HasPos,
                                       unsigned PosOpcode, unsigned NegOpcode,
                                       const SDLoc &DL) { … }

// MatchRotate - Handle an 'or' of two operands.  If this is one of the many
// idioms for rotate, and if the target supports rotation instructions, generate
// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
// with different shifted sources.
SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { … }

/// Recursively traverses the expression calculating the origin of the requested
/// byte of the given value. Returns std::nullopt if the provider can't be
/// calculated.
///
/// For all the values except the root of the expression, we verify that the
/// value has exactly one use and if not then return std::nullopt. This way if
/// the origin of the byte is returned it's guaranteed that the values which
/// contribute to the byte are not used outside of this expression.

/// However, there is a special case when dealing with vector loads -- we allow
/// more than one use if the load is a vector type.  Since the values that
/// contribute to the byte ultimately come from the ExtractVectorElements of the
/// Load, we don't care if the Load has uses other than ExtractVectorElements,
/// because those operations are independent from the pattern to be combined.
/// For vector loads, we simply care that the ByteProviders are adjacent
/// positions of the same vector, and their index matches the byte that is being
/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
/// byte position we are trying to provide for the LoadCombine. If these do
/// not match, then we can not combine the vector loads. \p Index uses the
/// byte position we are trying to provide for and is matched against the
/// shl and load size. The \p Index algorithm ensures the requested byte is
/// provided for by the pattern, and the pattern does not over provide bytes.
///
///
/// The supported LoadCombine pattern for vector loads is as follows
///                              or
///                          /        \
///                         or        shl
///                       /     \      |
///                     or      shl   zext
///                   /    \     |     |
///                 shl   zext  zext  EVE*
///                  |     |     |     |
///                 zext  EVE*  EVE*  LOAD
///                  |     |     |
///                 EVE*  LOAD  LOAD
///                  |
///                 LOAD
///
/// *ExtractVectorElement
SDByteProvider;

static std::optional<SDByteProvider>
calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
                      std::optional<uint64_t> VectorIndex,
                      unsigned StartingIndex = 0) { … }

static unsigned littleEndianByteAt(unsigned BW, unsigned i) { … }

static unsigned bigEndianByteAt(unsigned BW, unsigned i) { … }

// Check if the bytes offsets we are looking at match with either big or
// little endian value loaded. Return true for big endian, false for little
// endian, and std::nullopt if match failed.
static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
                                       int64_t FirstOffset) { … }

// Look through one layer of truncate or extend.
static SDValue stripTruncAndExt(SDValue Value) { … }

/// Match a pattern where a wide type scalar value is stored by several narrow
/// stores. Fold it into a single store or a BSWAP and a store if the targets
/// supports it.
///
/// Assuming little endian target:
///  i8 *p = ...
///  i32 val = ...
///  p[0] = (val >> 0) & 0xFF;
///  p[1] = (val >> 8) & 0xFF;
///  p[2] = (val >> 16) & 0xFF;
///  p[3] = (val >> 24) & 0xFF;
/// =>
///  *((i32)p) = val;
///
///  i8 *p = ...
///  i32 val = ...
///  p[0] = (val >> 24) & 0xFF;
///  p[1] = (val >> 16) & 0xFF;
///  p[2] = (val >> 8) & 0xFF;
///  p[3] = (val >> 0) & 0xFF;
/// =>
///  *((i32)p) = BSWAP(val);
SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { … }

/// Match a pattern where a wide type scalar value is loaded by several narrow
/// loads and combined by shifts and ors. Fold it into a single load or a load
/// and a BSWAP if the targets supports it.
///
/// Assuming little endian target:
///  i8 *a = ...
///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
/// =>
///  i32 val = *((i32)a)
///
///  i8 *a = ...
///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
/// =>
///  i32 val = BSWAP(*((i32)a))
///
/// TODO: This rule matches complex patterns with OR node roots and doesn't
/// interact well with the worklist mechanism. When a part of the pattern is
/// updated (e.g. one of the loads) its direct users are put into the worklist,
/// but the root node of the pattern which triggers the load combine is not
/// necessarily a direct user of the changed node. For example, once the address
/// of t28 load is reassociated load combine won't be triggered:
///             t25: i32 = add t4, Constant:i32<2>
///           t26: i64 = sign_extend t25
///        t27: i64 = add t2, t26
///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
///     t29: i32 = zero_extend t28
///   t32: i32 = shl t29, Constant:i8<8>
/// t33: i32 = or t23, t32
/// As a possible fix visitLoad can check if the load can be a part of a load
/// combine pattern and add corresponding OR roots to the worklist.
SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { … }

// If the target has andn, bsl, or a similar bit-select instruction,
// we want to unfold masked merge, with canonical pattern of:
//   |        A  |  |B|
//   ((x ^ y) & m) ^ y
//    |  D  |
// Into:
//   (x & m) | (y & ~m)
// If y is a constant, m is not a 'not', and the 'andn' does not work with
// immediates, we unfold into a different pattern:
//   ~(~x & m) & (m | y)
// If x is a constant, m is a 'not', and the 'andn' does not work with
// immediates, we unfold into a different pattern:
//   (x | ~m) & ~(~m & ~y)
// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
//       the very least that breaks andnpd / andnps patterns, and because those
//       patterns are simplified in IR and shouldn't be created in the DAG
SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { … }

SDValue DAGCombiner::visitXOR(SDNode *N) { … }

/// If we have a shift-by-constant of a bitwise logic op that itself has a
/// shift-by-constant operand with identical opcode, we may be able to convert
/// that into 2 independent shifts followed by the logic op. This is a
/// throughput improvement.
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) { … }

/// Handle transforms common to the three shifts, when the shift amount is a
/// constant.
/// We are looking for: (shift being one of shl/sra/srl)
///   shift (binop X, C0), C1
/// And want to transform into:
///   binop (shift X, C1), (shift C0, C1)
SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { … }

SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { … }

SDValue DAGCombiner::visitRotate(SDNode *N) { … }

SDValue DAGCombiner::visitSHL(SDNode *N) { … }

// Transform a right shift of a multiply into a multiply-high.
// Examples:
// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
                                  const TargetLowering &TLI) { … }

// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG) { … }

SDValue DAGCombiner::visitSRA(SDNode *N) { … }

SDValue DAGCombiner::visitSRL(SDNode *N) { … }

SDValue DAGCombiner::visitFunnelShift(SDNode *N) { … }

SDValue DAGCombiner::visitSHLSAT(SDNode *N) { … }

// Given a ABS node, detect the following patterns:
// (ABS (SUB (EXTEND a), (EXTEND b))).
// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
// Generates UABD/SABD instruction.
SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) { … }

SDValue DAGCombiner::visitABS(SDNode *N) { … }

SDValue DAGCombiner::visitBSWAP(SDNode *N) { … }

SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { … }

SDValue DAGCombiner::visitCTLZ(SDNode *N) { … }

SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { … }

SDValue DAGCombiner::visitCTTZ(SDNode *N) { … }

SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { … }

SDValue DAGCombiner::visitCTPOP(SDNode *N) { … }

static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
                                         SDValue RHS, const SDNodeFlags Flags,
                                         const TargetLowering &TLI) { … }

static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS,
                                       SDValue RHS, SDValue True, SDValue False,
                                       ISD::CondCode CC,
                                       const TargetLowering &TLI,
                                       SelectionDAG &DAG) { … }

/// Generate Min/Max node
SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
                                         SDValue RHS, SDValue True,
                                         SDValue False, ISD::CondCode CC) { … }

/// If a (v)select has a condition value that is a sign-bit test, try to smear
/// the condition operand sign-bit across the value width and use it as a mask.
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL,
                                             SelectionDAG &DAG) { … }

static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT,
                                                 const TargetLowering &TLI) { … }

SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { … }

template <class MatchContextClass>
static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL,
                                     SelectionDAG &DAG) { … }

static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) { … }

// Match SELECTs with absolute difference patterns.
// (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
// (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
// (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
// (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
                                     SDValue False, ISD::CondCode CC,
                                     const SDLoc &DL) { … }

SDValue DAGCombiner::visitSELECT(SDNode *N) { … }

// This function assumes all the vselect's arguments are CONCAT_VECTOR
// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { … }

bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
                       SelectionDAG &DAG, const SDLoc &DL) { … }

// Fold sext/zext of index into index type.
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
                     SelectionDAG &DAG) { … }

SDValue DAGCombiner::visitVPSCATTER(SDNode *N) { … }

SDValue DAGCombiner::visitMSCATTER(SDNode *N) { … }

SDValue DAGCombiner::visitMSTORE(SDNode *N) { … }

SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) { … }

SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) { … }

SDValue DAGCombiner::visitVPGATHER(SDNode *N) { … }

SDValue DAGCombiner::visitMGATHER(SDNode *N) { … }

SDValue DAGCombiner::visitMLOAD(SDNode *N) { … }

SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) { … }

/// A vector select of 2 constant vectors can be simplified to math/logic to
/// avoid a variable select instruction and possibly avoid constant loads.
SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { … }

SDValue DAGCombiner::visitVP_SELECT(SDNode *N) { … }

SDValue DAGCombiner::visitVSELECT(SDNode *N) { … }

SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { … }

SDValue DAGCombiner::visitSETCC(SDNode *N) { … }

SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) { … }

/// Check if N satisfies:
///   N is used once.
///   N is a Load.
///   The load is compatible with ExtOpcode. It means
///     If load has explicit zero/sign extension, ExpOpcode must have the same
///     extension.
///     Otherwise returns true.
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) { … }

/// Fold
///   (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
///   (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
///   (aext (select c, load x, load y)) -> (select c, extload x, extload y)
/// This function is called by the DAGCombiner when visiting sext/zext/aext
/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
                                         SelectionDAG &DAG, const SDLoc &DL,
                                         CombineLevel Level) { … }

/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
/// a build_vector of constants.
/// This function is called by the DAGCombiner when visiting sext/zext/aext
/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
/// Vector extends are not folded if operations are legal; this is to
/// avoid introducing illegal build_vector dag nodes.
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL,
                                         const TargetLowering &TLI,
                                         SelectionDAG &DAG, bool LegalTypes) { … }

// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
// transformation. Returns true if extension are possible and the above
// mentioned transformation is profitable.
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
                                    unsigned ExtOpc,
                                    SmallVectorImpl<SDNode *> &ExtendNodes,
                                    const TargetLowering &TLI) { … }

void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
                                  SDValue OrigLoad, SDValue ExtLoad,
                                  ISD::NodeType ExtType) { … }

// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
SDValue DAGCombiner::CombineExtLoad(SDNode *N) { … }

// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
//      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) { … }

/// If we're narrowing or widening the result of a vector select and the final
/// size is the same size as a setcc (compare) feeding the select, then try to
/// apply the cast operation to the select's operands because matching vector
/// sizes for a select condition and other operands should be more efficient.
SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) { … }

// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
// fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
                                     const TargetLowering &TLI, EVT VT,
                                     bool LegalOperations, SDNode *N,
                                     SDValue N0, ISD::LoadExtType ExtLoadType) { … }

// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
// Only generate vector extloads when 1) they're legal, and 2) they are
// deemed desirable by the target. NonNegZExt can be set to true if a zero
// extend has the nonneg flag to allow use of sextload if profitable.
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
                                  const TargetLowering &TLI, EVT VT,
                                  bool LegalOperations, SDNode *N, SDValue N0,
                                  ISD::LoadExtType ExtLoadType,
                                  ISD::NodeType ExtOpc,
                                  bool NonNegZExt = false) { … }

static SDValue
tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT,
                         bool LegalOperations, SDNode *N, SDValue N0,
                         ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) { … }

// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG,
                                        const TargetLowering &TLI, EVT VT,
                                        SDValue N0,
                                        ISD::LoadExtType ExtLoadType) { … }

static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
                                       bool LegalOperations) { … }

SDValue DAGCombiner::foldSextSetcc(SDNode *N) { … }

SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { … }

/// Given an extending node with a pop-count operand, if the target does not
/// support a pop-count in the narrow source type but does support it in the
/// destination type, widen the pop-count to the destination type.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) { … }

// If we have (zext (abs X)) where X is a type that will be promoted by type
// legalization, convert to (abs (sext X)). But don't extend past a legal type.
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) { … }

SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { … }

SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { … }

SDValue DAGCombiner::visitAssertExt(SDNode *N) { … }

SDValue DAGCombiner::visitAssertAlign(SDNode *N) { … }

/// If the result of a load is shifted/masked/truncated to an effectively
/// narrower type, try to transform the load to a narrower type and/or
/// use an extending load.
SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { … }

SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { … }

static SDValue foldExtendVectorInregToExtendOfSubvector(
    SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
    bool LegalOperations) { … }

SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) { … }

SDValue DAGCombiner::visitTRUNCATE_USAT_U(SDNode *N) { … }

/// Detect patterns of truncation with unsigned saturation:
///
/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
/// Return the source value x to be truncated or SDValue() if the pattern was
/// not matched.
///
static SDValue detectUSatUPattern(SDValue In, EVT VT) { … }

/// Detect patterns of truncation with signed saturation:
/// (truncate (smin (smax (x, signed_min_of_dest_type),
///                  signed_max_of_dest_type)) to dest_type)
/// or:
/// (truncate (smax (smin (x, signed_max_of_dest_type),
///                  signed_min_of_dest_type)) to dest_type).
///
/// Return the source value to be truncated or SDValue() if the pattern was not
/// matched.
static SDValue detectSSatSPattern(SDValue In, EVT VT) { … }

/// Detect patterns of truncation with unsigned saturation:
static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG,
                                  const SDLoc &DL) { … }

static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
                               SDLoc &DL, const TargetLowering &TLI,
                               SelectionDAG &DAG) { … }

SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { … }

static SDNode *getBuildPairElt(SDNode *N, unsigned i) { … }

/// build_pair (load, load) -> load
/// if load locations are consecutive.
SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { … }

static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) { … }

SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
                                          const TargetLowering &TLI) { … }

SDValue DAGCombiner::visitBITCAST(SDNode *N) { … }

SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { … }

SDValue DAGCombiner::visitFREEZE(SDNode *N) { … }

/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
/// operands. DstEltVT indicates the destination element value type.
SDValue DAGCombiner::
ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { … }

// Returns true if floating point contraction is allowed on the FMUL-SDValue
// `N`
static bool isContractableFMUL(const TargetOptions &Options, SDValue N) { … }

// Returns true if `N` can assume no infinities involved in its computation.
static bool hasNoInfs(const TargetOptions &Options, SDValue N) { … }

/// Try to perform FMA combining on a given FADD node.
template <class MatchContextClass>
SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { … }

/// Try to perform FMA combining on a given FSUB node.
template <class MatchContextClass>
SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { … }

/// Try to perform FMA combining on a given FMUL node based on the distributive
/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
/// subtraction instead of addition).
SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { … }

SDValue DAGCombiner::visitVP_FADD(SDNode *N) { … }

SDValue DAGCombiner::visitFADD(SDNode *N) { … }

SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) { … }

SDValue DAGCombiner::visitFSUB(SDNode *N) { … }

// Transform IEEE Floats:
//      (fmul C, (uitofp Pow2))
//          -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
//      (fdiv C, (uitofp Pow2))
//          -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
//
// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
// there is no need for more than an add/sub.
//
// This is valid under the following circumstances:
// 1) We are dealing with IEEE floats
// 2) C is normal
// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
// TODO: Much of this could also be used for generating `ldexp` on targets the
// prefer it.
SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) { … }

SDValue DAGCombiner::visitFMUL(SDNode *N) { … }

template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) { … }

SDValue DAGCombiner::visitFMAD(SDNode *N) { … }

// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal.
// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
// Notice that this is not always beneficial. One reason is different targets
// may have different costs for FDIV and FMUL, so sometimes the cost of two
// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { … }

SDValue DAGCombiner::visitFDIV(SDNode *N) { … }

SDValue DAGCombiner::visitFREM(SDNode *N) { … }

SDValue DAGCombiner::visitFSQRT(SDNode *N) { … }

/// copysign(x, fp_extend(y)) -> copysign(x, y)
/// copysign(x, fp_round(y)) -> copysign(x, y)
/// Operands to the functions are the type of X and Y respectively.
static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) { … }

static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { … }

SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { … }

SDValue DAGCombiner::visitFPOW(SDNode *N) { … }

static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
                               const TargetLowering &TLI) { … }

SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { … }

SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { … }

// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { … }

SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { … }

SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { … }

SDValue DAGCombiner::visitXROUND(SDNode *N) { … }

SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { … }

SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { … }

SDValue DAGCombiner::visitFCEIL(SDNode *N) { … }

SDValue DAGCombiner::visitFTRUNC(SDNode *N) { … }

SDValue DAGCombiner::visitFFREXP(SDNode *N) { … }

SDValue DAGCombiner::visitFFLOOR(SDNode *N) { … }

SDValue DAGCombiner::visitFNEG(SDNode *N) { … }

SDValue DAGCombiner::visitFMinMax(SDNode *N) { … }

SDValue DAGCombiner::visitFABS(SDNode *N) { … }

SDValue DAGCombiner::visitBRCOND(SDNode *N) { … }

SDValue DAGCombiner::rebuildSetCC(SDValue N) { … }

// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
//
SDValue DAGCombiner::visitBR_CC(SDNode *N) { … }

static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
                                     bool &IsLoad, bool &IsMasked, SDValue &Ptr,
                                     const TargetLowering &TLI) { … }

/// Try turning a load/store into a pre-indexed load/store when the base
/// pointer is an add or subtract and it has other uses besides the load/store.
/// After the transformation, the new indexed load/store has effectively folded
/// the add/subtract in and all of its other uses are redirected to the
/// new load/store.
bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { … }

static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
                                   SDValue &BasePtr, SDValue &Offset,
                                   ISD::MemIndexedMode &AM,
                                   SelectionDAG &DAG,
                                   const TargetLowering &TLI) { … }

static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
                                         bool &IsMasked, SDValue &Ptr,
                                         SDValue &BasePtr, SDValue &Offset,
                                         ISD::MemIndexedMode &AM,
                                         SelectionDAG &DAG,
                                         const TargetLowering &TLI) { … }

/// Try to combine a load/store with a add/sub of the base pointer node into a
/// post-indexed load/store. The transformation folded the add/subtract into the
/// new indexed load/store effectively and all of its uses are redirected to the
/// new load/store.
bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { … }

/// Return the base-pointer arithmetic from an indexed \p LD.
SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { … }

static inline ElementCount numVectorEltsOrZero(EVT T) { … }

bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) { … }

bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) { … }

StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
                                                int64_t &Offset) { … }

SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { … }

SDValue DAGCombiner::visitLOAD(SDNode *N) { … }

namespace {

/// Helper structure used to slice a load in smaller loads.
/// Basically a slice is obtained from the following sequence:
/// Origin = load Ty1, Base
/// Shift = srl Ty1 Origin, CstTy Amount
/// Inst = trunc Shift to Ty2
///
/// Then, it will be rewritten into:
/// Slice = load SliceTy, Base + SliceOffset
/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
///
/// SliceTy is deduced from the number of bits that are actually used to
/// build Inst.
struct LoadedSlice { … };

} // end anonymous namespace

/// Check that all bits set in \p UsedBits form a dense region, i.e.,
/// \p UsedBits looks like 0..0 1..1 0..0.
static bool areUsedBitsDense(const APInt &UsedBits) { … }

/// Check whether or not \p First and \p Second are next to each other
/// in memory. This means that there is no hole between the bits loaded
/// by \p First and the bits loaded by \p Second.
static bool areSlicesNextToEachOther(const LoadedSlice &First,
                                     const LoadedSlice &Second) { … }

/// Adjust the \p GlobalLSCost according to the target
/// paring capabilities and the layout of the slices.
/// \pre \p GlobalLSCost should account for at least as many loads as
/// there is in the slices in \p LoadedSlices.
static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
                                 LoadedSlice::Cost &GlobalLSCost) { … }

/// Check the profitability of all involved LoadedSlice.
/// Currently, it is considered profitable if there is exactly two
/// involved slices (1) which are (2) next to each other in memory, and
/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
///
/// Note: The order of the elements in \p LoadedSlices may be modified, but not
/// the elements themselves.
///
/// FIXME: When the cost model will be mature enough, we can relax
/// constraints (1) and (2).
static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
                                const APInt &UsedBits, bool ForCodeSize) { … }

/// If the given load, \p LI, is used only by trunc or trunc(lshr)
/// operations, split it in the various pieces being extracted.
///
/// This sort of thing is introduced by SROA.
/// This slicing takes care not to insert overlapping loads.
/// \pre LI is a simple load (i.e., not an atomic or volatile load).
bool DAGCombiner::SliceUpLoad(SDNode *N) { … }

/// Check to see if V is (and load (ptr), imm), where the load is having
/// specific bytes cleared out.  If so, return the byte size being masked out
/// and the shift amount.
static std::pair<unsigned, unsigned>
CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { … }

/// Check to see if IVal is something that provides a value as specified by
/// MaskInfo. If so, replace the specified store with a narrower store of
/// truncated IVal.
static SDValue
ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
                                SDValue IVal, StoreSDNode *St,
                                DAGCombiner *DC) { … }

/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
/// narrowing the load and store if it would end up being a win for performance
/// or code size.
SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { … }

/// For a given floating point load / store pair, if the load value isn't used
/// by any other operations, then consider transforming the pair to integer
/// load / store operations if the target deems the transformation profitable.
SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { … }

// This is a helper function for visitMUL to check the profitability
// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
// MulNode is the original multiply, AddNode is (add x, c1),
// and ConstNode is c2.
//
// If the (add x, c1) has multiple uses, we could increase
// the number of adds if we make this transformation.
// It would only be worth doing this if we can remove a
// multiply in the process. Check for that here.
// To illustrate:
//     (A + c1) * c3
//     (A + c2) * c3
// We're checking for cases where we have common "c3 * A" expressions.
bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
                                              SDValue ConstNode) { … }

SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
                                         unsigned NumStores) { … }

bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) { … }

bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
    SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
    bool IsConstantSrc, bool UseVector, bool UseTrunc) { … }

void DAGCombiner::getStoreMergeCandidates(
    StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
    SDNode *&RootNode) { … }

// We need to check that merging these stores does not cause a loop in the
// DAG. Any store candidate may depend on another candidate indirectly through
// its operands. Check in parallel by searching up from operands of candidates.
bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
    SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
    SDNode *RootNode) { … }

unsigned
DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
                                  int64_t ElementSizeBytes) const { … }

bool DAGCombiner::tryStoreMergeOfConstants(
    SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
    EVT MemVT, SDNode *RootNode, bool AllowVectors) { … }

bool DAGCombiner::tryStoreMergeOfExtracts(
    SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
    EVT MemVT, SDNode *RootNode) { … }

bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
                                       unsigned NumConsecutiveStores, EVT MemVT,
                                       SDNode *RootNode, bool AllowVectors,
                                       bool IsNonTemporalStore,
                                       bool IsNonTemporalLoad) { … }

bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) { … }

SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) { … }

SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { … }

// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
//
// If a store of a load with an element inserted into it has no other
// uses in between the chain, then we can consider the vector store
// dead and replace it with just the single scalar element store.
SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) { … }

SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) { … }

SDValue DAGCombiner::visitSTORE(SDNode *N) { … }

SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { … }

/// For the instruction sequence of store below, F and I values
/// are bundled together as an i64 value before being stored into memory.
/// Sometimes it is more efficent to generate separate stores for F and I,
/// which can remove the bitwise instructions or sink them to colder places.
///
///   (store (or (zext (bitcast F to i32) to i64),
///              (shl (zext I to i64), 32)), addr)  -->
///   (store F, addr) and (store I, addr+4)
///
/// Similarly, splitting for other merged store can also be beneficial, like:
/// For pair of {i32, i32}, i64 store --> two i32 stores.
/// For pair of {i32, i16}, i64 store --> two i32 stores.
/// For pair of {i16, i16}, i32 store --> two i16 stores.
/// For pair of {i16, i8},  i32 store --> two i16 stores.
/// For pair of {i8, i8},   i16 store --> two i8 stores.
///
/// We allow each target to determine specifically which kind of splitting is
/// supported.
///
/// The store patterns are commonly seen from the simple code snippet below
/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
///   void goo(const std::pair<int, float> &);
///   hoo() {
///     ...
///     goo(std::make_pair(tmp, ftmp));
///     ...
///   }
///
SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { … }

// Merge an insertion into an existing shuffle:
// (insert_vector_elt (vector_shuffle X, Y, Mask),
//                   .(extract_vector_elt X, N), InsIndex)
//   --> (vector_shuffle X, Y, NewMask)
//  and variations where shuffle operands may be CONCAT_VECTORS.
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef<int> Mask,
                                SmallVectorImpl<int> &NewMask, SDValue Elt,
                                unsigned InsIndex) { … }

// Merge an insertion into an existing shuffle:
// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
// InsIndex)
//   --> (vector_shuffle X, Y) and variations where shuffle operands may be
//   CONCAT_VECTORS.
SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) { … }

// Convert a disguised subvector insertion into a shuffle:
// insert_vector_elt V, (bitcast X from vector type), IdxC -->
// bitcast(shuffle (bitcast V), (extended X), Mask)
// Note: We do not use an insert_subvector node because that requires a
// legal subvector type.
SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { … }

// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
// possible and the new load will be quick. We use more loads but less shuffles
// and inserts.
SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) { … }

SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { … }

SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
                                                  SDValue EltNo,
                                                  LoadSDNode *OriginalLoad) { … }

/// Transform a vector binary operation into a scalar binary operation by moving
/// the math/logic after an extract element of a vector.
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
                                       const SDLoc &DL, bool LegalOperations) { … }

// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
// recursively analyse all of it's users. and try to model themselves as
// bit sequence extractions. If all of them agree on the new, narrower element
// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
// new element type, do so now.
// This is mainly useful to recover from legalization that scalarized
// the vector as wide elements, but tries to rebuild it with narrower elements.
//
// Some more nodes could be modelled if that helps cover interesting patterns.
bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
    SDNode *N) { … }

SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { … }

// Simplify (build_vec (ext )) to (bitcast (build_vec ))
SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { … }

// Simplify (build_vec (trunc $1)
//                     (trunc (srl $1 half-width))
//                     (trunc (srl $1 (2 * half-width))))
// to (bitcast $1)
SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) { … }

SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
                                           ArrayRef<int> VectorMask,
                                           SDValue VecIn1, SDValue VecIn2,
                                           unsigned LeftIdx, bool DidSplitVec) { … }

static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) { … }

// FIXME: promote to STLExtras.
template <typename R, typename T>
static auto getFirstIndexOf(R &&Range, const T &Val) { … }

// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
// operations. If the types of the vectors we're extracting from allow it,
// turn this into a vector_shuffle node.
SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { … }

// Try to turn a build vector of zero extends of extract vector elts into a
// a vector zero extend and possibly an extract subvector.
// TODO: Support sign extend?
// TODO: Allow undef elements?
SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) { … }

// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
// and all other elements being constant zero's, granularize the BUILD_VECTOR's
// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
// This patten can appear during legalization.
//
// NOTE: This can be generalized to allow more than a single
//       non-constant-zero op, UNDEF's, and to be KnownBits-based,
SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) { … }

SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { … }

static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { … }

// Attempt to merge nested concat_vectors/undefs.
// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
//  --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
                                                  SelectionDAG &DAG) { … }

// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
// most two distinct vectors the same size as the result, attempt to turn this
// into a legal shuffle.
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { … }

static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) { … }

// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
static SDValue combineConcatVectorOfShuffleAndItsOperands(
    SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
    bool LegalOperations) { … }

SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { … }

// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
// if the subvector can be sourced for free.
static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) { … }

static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
                                              SelectionDAG &DAG,
                                              bool LegalOperations) { … }

/// If we are extracting a subvector produced by a wide binary operator try
/// to use a narrow binary operator and/or avoid concatenation and extraction.
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
                                          bool LegalOperations) { … }

/// If we are extracting a subvector from a wide vector load, convert to a
/// narrow load to eliminate the extraction:
/// (extract_subvector (load wide vector)) --> (load narrow vector)
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { … }

/// Given  EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
/// try to produce  VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
///                                EXTRACT_SUBVECTOR(Op?, ?),
///                                Mask'))
/// iff it is legal and profitable to do so. Notably, the trimmed mask
/// (containing only the elements that are extracted)
/// must reference at most two subvectors.
static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
                                                     SelectionDAG &DAG,
                                                     const TargetLowering &TLI,
                                                     bool LegalOperations) { … }

SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { … }

/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
/// followed by concatenation. Narrow vector ops may have better performance
/// than wide ops, and this can unlock further narrowing of other vector ops.
/// Targets can invert this transform later if it is not profitable.
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
                                         SelectionDAG &DAG) { … }

// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
// or turn a shuffle of a single concat into simpler shuffle then concat.
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { … }

// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
//
// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
// a simplification in some sense, but it isn't appropriate in general: some
// BUILD_VECTORs are substantially cheaper than others. The general case
// of a BUILD_VECTOR requires inserting each element individually (or
// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
// all constants is a single constant pool load.  A BUILD_VECTOR where each
// element is identical is a splat.  A BUILD_VECTOR where most of the operands
// are undef lowers to a small number of element insertions.
//
// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
// We don't fold shuffles where one side is a non-zero constant, and we don't
// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
// non-constant operands. This seems to work out reasonably well in practice.
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
                                       SelectionDAG &DAG,
                                       const TargetLowering &TLI) { … }

// Match shuffles that can be converted to *_vector_extend_in_reg.
// This is often generated during legalization.
// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
// and returns the EVT to which the extension should be performed.
// NOTE: this assumes that the src is the first operand of the shuffle.
static std::optional<EVT> canCombineShuffleToExtendVectorInreg(
    unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
    SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
    bool LegalOperations) { … }

// Match shuffles that can be converted to any_vector_extend_in_reg.
// This is often generated during legalization.
// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN,
                                                    SelectionDAG &DAG,
                                                    const TargetLowering &TLI,
                                                    bool LegalOperations) { … }

// Match shuffles that can be converted to zero_extend_vector_inreg.
// This is often generated during legalization.
// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN,
                                                     SelectionDAG &DAG,
                                                     const TargetLowering &TLI,
                                                     bool LegalOperations) { … }

// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
// each source element of a large type into the lowest elements of a smaller
// destination type. This is often generated during legalization.
// If the source node itself was a '*_extend_vector_inreg' node then we should
// then be able to remove it.
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
                                        SelectionDAG &DAG) { … }

// Combine shuffles of splat-shuffles of the form:
// shuffle (shuffle V, undef, splat-mask), undef, M
// If splat-mask contains undef elements, we need to be careful about
// introducing undef's in the folded mask which are not the result of composing
// the masks of the shuffles.
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
                                        SelectionDAG &DAG) { … }

// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
// the mask can be treated as a larger type.
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN,
                                       SelectionDAG &DAG,
                                       const TargetLowering &TLI,
                                       bool LegalOperations) { … }

/// Combine shuffle of shuffle of the form:
/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
                                     SelectionDAG &DAG) { … }

/// If the shuffle mask is taking exactly one element from the first vector
/// operand and passing through all other elements from the second vector
/// operand, return the index of the mask element that is choosing an element
/// from the first operand. Otherwise, return -1.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) { … }

/// If a shuffle inserts exactly one element from a source vector operand into
/// another vector operand and we can access the specified element as a scalar,
/// then we can eliminate the shuffle.
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
                                      SelectionDAG &DAG) { … }

/// If we have a unary shuffle of a shuffle, see if it can be folded away
/// completely. This has the potential to lose undef knowledge because the first
/// shuffle may not have an undef mask element where the second one does. So
/// only call this after doing simplifications based on demanded elements.
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) { … }

SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { … }

SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { … }

SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { … }

SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { … }

SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { … }

SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) { … }

SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) { … }

SDValue DAGCombiner::visitVECREDUCE(SDNode *N) { … }

SDValue DAGCombiner::visitVP_FSUB(SDNode *N) { … }

SDValue DAGCombiner::visitVPOp(SDNode *N) { … }

SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) { … }

SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) { … }

/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
///      vector_shuffle V, Zero, <0, 4, 2, 4>
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { … }

/// If a vector binop is performed on splat values, it may be profitable to
/// extract, scalarize, and insert/splat.
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
                                      const SDLoc &DL, bool LegalTypes) { … }

/// Visit a vector cast operation, like FP_EXTEND.
SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) { … }

/// Visit a binary vector operation, like ADD.
SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) { … }

SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
                                    SDValue N2) { … }

/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
/// being selected between, see if we can simplify the select.  Callers of this
/// should assume that TheSelect is deleted if this returns true.  As such, they
/// should return the appropriate thing (e.g. the node) back to the top-level of
/// the DAG combiner loop to avoid it being looked at.
bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
                                    SDValue RHS) { … }

/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
/// bitwise 'and'.
SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
                                            SDValue N1, SDValue N2, SDValue N3,
                                            ISD::CondCode CC) { … }

// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) { … }

// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) { … }

/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
/// in it. This may be a win when the constant is not otherwise available
/// because it replaces two constant pool loads with one.
SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
    const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
    ISD::CondCode CC) { … }

/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
/// where 'cond' is the comparison specified by CC.
SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
                                      SDValue N2, SDValue N3, ISD::CondCode CC,
                                      bool NotExtCompare) { … }

/// This is a stub for TargetLowering::SimplifySetCC.
SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                                   ISD::CondCode Cond, const SDLoc &DL,
                                   bool foldBooleans) { … }

/// Given an ISD::SDIV node expressing a divide by constant, return
/// a DAG expression to select that will generate the same value by multiplying
/// by a magic number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue DAGCombiner::BuildSDIV(SDNode *N) { … }

/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
/// DAG expression that will generate the same value by right shifting.
SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { … }

/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
/// expression that will generate the same value by multiplying by a magic
/// number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue DAGCombiner::BuildUDIV(SDNode *N) { … }

/// Given an ISD::SREM node expressing a remainder by constant power of 2,
/// return a DAG expression that will generate the same value.
SDValue DAGCombiner::BuildSREMPow2(SDNode *N) { … }

// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
//
// Returns the node that represents `Log2(Op)`. This may create a new node. If
// we are unable to compute `Log2(Op)` its return `SDValue()`.
//
// All nodes will be created at `DL` and the output will be of type `VT`.
//
// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
// `AssumeNonZero` if this function should simply assume (not require proving
// `Op` is non-zero).
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
                                   SDValue Op, unsigned Depth,
                                   bool AssumeNonZero) { … }

/// Determines the LogBase2 value for a non-null input value using the
/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
                                   bool KnownNonZero, bool InexpensiveOnly,
                                   std::optional<EVT> OutVT) { … }

/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
/// For the reciprocal, we need to find the zero of the function:
///   F(X) = 1/X - A [which has a zero at X = 1/A]
///     =>
///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
///     does not require additional intermediate precision]
/// For the last iteration, put numerator N into it to gain more precision:
///   Result = N X_i + X_i (N - N A X_i)
SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
                                      SDNodeFlags Flags) { … }

/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
/// For the reciprocal sqrt, we need to find the zero of the function:
///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
///     =>
///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
/// As a result, we precompute A/2 prior to the iteration loop.
SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
                                         unsigned Iterations,
                                         SDNodeFlags Flags, bool Reciprocal) { … }

/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
/// For the reciprocal sqrt, we need to find the zero of the function:
///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
///     =>
///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
                                         unsigned Iterations,
                                         SDNodeFlags Flags, bool Reciprocal) { … }

/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
/// Op can be zero.
SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
                                           bool Reciprocal) { … }

SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) { … }

SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { … }

/// Return true if there is any possibility that the two addresses overlap.
bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { … }

/// Walk up chain skipping non-aliasing memory nodes,
/// looking for aliasing nodes and adding them to the Aliases vector.
void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
                                   SmallVectorImpl<SDValue> &Aliases) { … }

/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
/// (aliasing node.)
SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { … }

// This function tries to collect a bunch of potentially interesting
// nodes to improve the chains of, all at once. This might seem
// redundant, as this function gets called when visiting every store
// node, so why not let the work be done on each store as it's visited?
//
// I believe this is mainly important because mergeConsecutiveStores
// is unable to deal with merging stores of different sizes, so unless
// we improve the chains of all the potential candidates up-front
// before running mergeConsecutiveStores, it might only see some of
// the nodes that will eventually be candidates, and then not be able
// to go from a partially-merged state to the desired final
// fully-merged state.

bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { … }

bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { … }

/// This is the entry point for the file.
void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
                           CodeGenOptLevel OptLevel) { … }
llvm/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp