//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run // both before and after the DAG is legalized. // // This pass is not a substitute for the LLVM IR instcombine pass. This pass is // primarily intended to handle simplification opportunities that are implicit // in the LLVM IR and exposed by the various codegen lowering phases. // //===----------------------------------------------------------------------===// #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/ByteProvider.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/RuntimeLibcallUtil.h" #include "llvm/CodeGen/SDPatternMatch.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DebugCounter.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include <algorithm> #include <cassert> #include <cstdint> #include <functional> #include <iterator> #include <optional> #include <string> #include <tuple> #include <utility> #include <variant> #include "MatchContext.h" usingnamespacellvm; usingnamespacellvm::SDPatternMatch; #define DEBUG_TYPE … STATISTIC(NodesCombined , "Number of dag nodes combined"); STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); STATISTIC(SlicedLoads, "Number of load sliced"); STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops"); DEBUG_COUNTER(DAGCombineCounter, "dagcombine", "Controls whether a DAG combine is performed for a node"); static cl::opt<bool> CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis")); static cl::opt<bool> UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA")); #ifndef NDEBUG static cl::opt<std::string> CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function")); #endif /// Hidden option to stress test load slicing, i.e., when this option /// is enabled, load slicing bypasses most of its profitability guards. static cl::opt<bool> StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false)); static cl::opt<bool> MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads")); static cl::opt<bool> EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store")); static cl::opt<unsigned> TokenFactorInlineLimit( "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors")); static cl::opt<unsigned> StoreMergeDependenceLimit( "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check")); static cl::opt<bool> EnableReduceLoadOpStoreWidth( "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence")); static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore( "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store")); static cl::opt<bool> EnableVectorFCopySignExtendRound( "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false), cl::desc( "Enable merging extends and rounds into FCOPYSIGN on vector types")); namespace { class DAGCombiner { … }; /// This class is a DAGUpdateListener that removes any deleted /// nodes from the worklist. class WorklistRemover : public SelectionDAG::DAGUpdateListener { … }; class WorklistInserter : public SelectionDAG::DAGUpdateListener { … }; } // end anonymous namespace //===----------------------------------------------------------------------===// // TargetLowering::DAGCombinerInfo implementation //===----------------------------------------------------------------------===// void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { … } SDValue TargetLowering::DAGCombinerInfo:: CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { … } SDValue TargetLowering::DAGCombinerInfo:: CombineTo(SDNode *N, SDValue Res, bool AddTo) { … } SDValue TargetLowering::DAGCombinerInfo:: CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { … } bool TargetLowering::DAGCombinerInfo:: recursivelyDeleteUnusedNodes(SDNode *N) { … } void TargetLowering::DAGCombinerInfo:: CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { … } //===----------------------------------------------------------------------===// // Helper Functions //===----------------------------------------------------------------------===// void DAGCombiner::deleteAndRecombine(SDNode *N) { … } // APInts must be the same size for most operations, this helper // function zero extends the shorter of the pair so that they match. // We provide an Offset so that we can create bitwidths that won't overflow. static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) { … } // Return true if this node is a setcc, or is a select_cc // that selects between the target values used for true and false, making it // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to // the appropriate nodes based on the type of node we are checking. This // simplifies life a bit for the callers. bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC, bool MatchStrict) const { … } /// Return true if this is a SetCC-equivalent operation with only one use. /// If this is true, it allows the users to invert the operation for free when /// it is profitable to do so. bool DAGCombiner::isOneUseSetCC(SDValue N) const { … } static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) { … } // Determines if it is a constant integer or a splat/build vector of constant // integers (and undefs). // Do not permit build vector implicit truncation. static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) { … } // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with // undef's. static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) { … } // Determine if this an indexed load with an opaque target constant index. static bool canSplitIdx(LoadSDNode *LD) { … } bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, const SDLoc &DL, SDNode *N, SDValue N0, SDValue N1) { … } /// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if /// \p N0 is the same kind of operation as \p Opc. SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags) { … } /// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the /// same kind of operation as \p Opc. SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags) { … } // Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y)) // Note that we only expect Flags to be passed from FP operations. For integer // operations they need to be dropped. SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL, EVT VT, SDValue N0, SDValue N1, SDNodeFlags Flags) { … } SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo) { … } void DAGCombiner:: CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { … } /// Check the specified integer node value to see if it can be simplified or if /// things it uses can be simplified by bit propagation. If so, return true. bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, bool AssumeSingleUse) { … } /// Check the specified vector node value to see if it can be simplified or /// if things it uses can be simplified as it only uses some of the elements. /// If so, return true. bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, bool AssumeSingleUse) { … } void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { … } SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { … } SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { … } SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { … } /// Promote the specified integer binary operation if the target indicates it is /// beneficial. e.g. On x86, it's usually better to promote i16 operations to /// i32 since i16 instructions are longer. SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { … } /// Promote the specified integer shift operation if the target indicates it is /// beneficial. e.g. On x86, it's usually better to promote i16 operations to /// i32 since i16 instructions are longer. SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { … } SDValue DAGCombiner::PromoteExtend(SDValue Op) { … } bool DAGCombiner::PromoteLoad(SDValue Op) { … } /// Recursively delete a node which has no uses and any operands for /// which it is the only use. /// /// Note that this both deletes the nodes and removes them from the worklist. /// It also adds any nodes who have had a user deleted to the worklist as they /// may now have only one use and subject to other combines. bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { … } //===----------------------------------------------------------------------===// // Main DAG Combiner implementation //===----------------------------------------------------------------------===// void DAGCombiner::Run(CombineLevel AtLevel) { … } SDValue DAGCombiner::visit(SDNode *N) { … } SDValue DAGCombiner::combine(SDNode *N) { … } /// Given a node, return its input chain if it has one, otherwise return a null /// sd operand. static SDValue getInputChainForNode(SDNode *N) { … } SDValue DAGCombiner::visitTokenFactor(SDNode *N) { … } /// MERGE_VALUES can always be eliminated. SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { … } /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a /// ConstantSDNode pointer else nullptr. static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { … } // isTruncateOf - If N is a truncate of some other value, return true, record // the value being truncated in Op and which of Op's bits are zero/one in Known. // This function computes KnownBits to avoid a duplicated call to // computeKnownBits in the caller. static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known) { … } /// Return true if 'Use' is a load or a store that uses N as its base pointer /// and that N may be folded in the load / store addressing mode. static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, const TargetLowering &TLI) { … } /// This inverts a canonicalization in IR that replaces a variable select arm /// with an identity constant. Codegen improves if we re-use the variable /// operand rather than load a constant. This can also be converted into a /// masked vector operation if the target supports it. static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands) { … } SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { … } static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG) { … } // Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1) SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) { … } /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into /// a shift and add with a different constant. static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG) { … } static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1) { … } /// Try to fold a node that behaves like an ADD (note that N isn't necessarily /// an ISD::ADD here, it could for example be an ISD::OR if we know that there /// are no common bits set in the operands). SDValue DAGCombiner::visitADDLike(SDNode *N) { … } // Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1) SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) { … } SDValue DAGCombiner::visitADD(SDNode *N) { … } SDValue DAGCombiner::visitADDSAT(SDNode *N) { … } static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction = false) { … } /// Given the operands of an add/sub operation, see if the 2nd operand is a /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert /// the opcode and bypass the mask operation. static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL) { … } /// Helper for doing combines based on N0 and N1 being added to each other. SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference) { … } SDValue DAGCombiner::visitADDC(SDNode *N) { … } /** * Flips a boolean if it is cheaper to compute. If the Force parameters is set, * then the flip also occurs if computing the inverse is the same cost. * This function returns an empty SDValue in case it cannot flip the boolean * without increasing the cost of the computation. If you want to flip a boolean * no matter what, use DAG.getLogicalNOT. */ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force) { … } SDValue DAGCombiner::visitADDO(SDNode *N) { … } SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) { … } SDValue DAGCombiner::visitADDE(SDNode *N) { … } SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) { … } /** * If we are facing some sort of diamond carry propagation pattern try to * break it up to generate something like: * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry) * * The end result is usually an increase in operation required, but because the * carry is now linearized, other transforms can kick in and optimize the DAG. * * Patterns typically look something like * (uaddo A, B) * / \ * Carry Sum * | \ * | (uaddo_carry *, 0, Z) * | / * \ Carry * | / * (uaddo_carry X, *, *) * * But numerous variation exist. Our goal is to identify A, B, X and Z and * produce a combine with a single path for carry propagation. */ static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N) { … } // If we are facing some sort of diamond carry/borrow in/out pattern try to // match patterns like: // // (uaddo A, B) CarryIn // | \ | // | \ | // PartialSum PartialCarryOutX / // | | / // | ____|____________/ // | / | // (uaddo *, *) \________ // | \ \ // | \ | // | PartialCarryOutY | // | \ | // | \ / // AddCarrySum | ______/ // | / // CarryOut = (or *, *) // // And generate UADDO_CARRY (or USUBO_CARRY) with two result values: // // {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn) // // Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY // with a single path for carry/borrow out propagation. static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N) { … } SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N) { … } SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N) { … } SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) { … } // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a // clamp/truncation if necessary. static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL) { … } // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to // usubsat(a,b), optionally as a truncated type. SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) { … } // Since it may not be valid to emit a fold to zero for vector initializers // check if we can before folding. static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations) { … } SDValue DAGCombiner::visitSUB(SDNode *N) { … } SDValue DAGCombiner::visitSUBSAT(SDNode *N) { … } SDValue DAGCombiner::visitSUBC(SDNode *N) { … } SDValue DAGCombiner::visitSUBO(SDNode *N) { … } SDValue DAGCombiner::visitSUBE(SDNode *N) { … } SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) { … } SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) { … } // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and // UMULFIXSAT here. SDValue DAGCombiner::visitMULFIX(SDNode *N) { … } template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) { … } /// Return true if divmod libcall is available. static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI) { … } /// Issue divrem if both quotient and remainder are needed. SDValue DAGCombiner::useDivRem(SDNode *Node) { … } static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { … } SDValue DAGCombiner::visitSDIV(SDNode *N) { … } static bool isDivisorPowerOfTwo(SDValue Divisor) { … } SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { … } SDValue DAGCombiner::visitUDIV(SDNode *N) { … } SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) { … } SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) { … } // handles ISD::SREM and ISD::UREM SDValue DAGCombiner::visitREM(SDNode *N) { … } SDValue DAGCombiner::visitMULHS(SDNode *N) { … } SDValue DAGCombiner::visitMULHU(SDNode *N) { … } SDValue DAGCombiner::visitAVG(SDNode *N) { … } SDValue DAGCombiner::visitABD(SDNode *N) { … } /// Perform optimizations common to nodes that compute two values. LoOp and HiOp /// give the opcodes for the two computations that are being performed. Return /// true if a simplification was made. SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp) { … } SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { … } SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { … } SDValue DAGCombiner::visitMULO(SDNode *N) { … } // Function to calculate whether the Min/Max pair of SDNodes (potentially // swapped around) make a signed saturate pattern, clamping to between a signed // saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW. // Returns the node being clamped and the bitwidth of the clamp in BW. Should // work with both SMIN/SMAX nodes and setcc/select combo. The operands are the // same as SimplifySelectCC. N0<N1 ? N2 : N3. static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG) { … } static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG) { … } static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG) { … } SDValue DAGCombiner::visitIMINMAX(SDNode *N) { … } /// If this is a bitwise logic instruction and both operands have the same /// opcode, try to sink the other opcode after the logic instruction. SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) { … } /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient. SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, const SDLoc &DL) { … } static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG) { … } static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG) { … } // FIXME: use FMINIMUMNUM if possible, such as for RISC-V. static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM) { … } static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) { … } // Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`. // We canonicalize to the `select` form in the middle end, but the `and` form // gets better codegen and all tested targets (arm, x86, riscv) static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG) { … } /// This contains all DAGCombine rules which reduce two values combined by /// an And operation to a single value. This makes them reusable in the context /// of visitSELECT(). Rules involving constants are not included as /// visitSELECT() already handles those cases. SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) { … } bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, EVT LoadResultTy, EVT &ExtVT) { … } bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST, ISD::LoadExtType ExtType, EVT &MemVT, unsigned ShAmt) { … } bool DAGCombiner::SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads, SmallPtrSetImpl<SDNode*> &NodesWithConsts, ConstantSDNode *Mask, SDNode *&NodeToMask) { … } bool DAGCombiner::BackwardsPropagateMask(SDNode *N) { … } // Unfold // x & (-1 'logical shift' y) // To // (x 'opposite logical shift' y) 'logical shift' y // if it is better for performance. SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) { … } /// Try to replace shift/logic that tests if a bit is clear with mask + setcc. /// For a target with a bit test, this is expected to become test + set and save /// at least 1 instruction. static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) { … } /// For targets that support usubsat, match a bit-hack form of that operation /// that ends in 'and' and convert it. static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL) { … } /// Given a bitwise logic operation N with a matching bitwise logic operand, /// fold a pattern where 2 of the source operands are identically shifted /// values. For example: /// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG) { … } /// Given a tree of logic operations with shape like /// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) /// try to match and fold shift operations with the same shift amount. /// For example: /// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) --> /// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W) static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG) { … } SDValue DAGCombiner::visitAND(SDNode *N) { … } /// Match (a >> 8) | (a << 8) as (bswap a) >> 16. SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits) { … } /// Return true if the specified node is an element that makes up a 32-bit /// packed halfword byteswap. /// ((x & 0x000000ff) << 8) | /// ((x & 0x0000ff00) >> 8) | /// ((x & 0x00ff0000) << 8) | /// ((x & 0xff000000) >> 8) static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { … } // Match 2 elements of a packed halfword bswap. static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) { … } // Match this pattern: // (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff)) // And rewrite this to: // (rotr (bswap A), 16) static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT) { … } /// Match a 32-bit packed halfword bswap. That is /// ((x & 0x000000ff) << 8) | /// ((x & 0x0000ff00) >> 8) | /// ((x & 0x00ff0000) << 8) | /// ((x & 0xff000000) >> 8) /// => (rotl (bswap x), 16) SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { … } /// This contains all DAGCombine rules which reduce two values combined by /// an Or operation to a single value \see visitANDLike(). SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) { … } /// OR combines for which the commuted variant will be tried as well. static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) { … } SDValue DAGCombiner::visitOR(SDNode *N) { … } static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask) { … } /// Match "(X shl/srl V1) & V2" where V2 may not be present. static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask) { … } /// Helper function for visitOR to extract the needed side of a rotate idiom /// from a shl/srl/mul/udiv. This is meant to handle cases where /// InstCombine merged some outside op with one of the shifts from /// the rotate pattern. /// \returns An empty \c SDValue if the needed shift couldn't be extracted. /// Otherwise, returns an expansion of \p ExtractFrom based on the following /// patterns: /// /// (or (add v v) (shrl v bitwidth-1)): /// expands (add v v) -> (shl v 1) /// /// (or (mul v c0) (shrl (mul v c1) c2)): /// expands (mul v c0) -> (shl (mul v c1) c3) /// /// (or (udiv v c0) (shl (udiv v c1) c2)): /// expands (udiv v c0) -> (shrl (udiv v c1) c3) /// /// (or (shl v c0) (shrl (shl v c1) c2)): /// expands (shl v c0) -> (shl (shl v c1) c3) /// /// (or (shrl v c0) (shl (shrl v c1) c2)): /// expands (shrl v c0) -> (shrl (shrl v c1) c3) /// /// Such that in all cases, c3+c2==bitwidth(op v c1). static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL) { … } // Return true if we can prove that, whenever Neg and Pos are both in the // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that // for two opposing shifts shift1 and shift2 and a value X with OpBits bits: // // (or (shift1 X, Neg), (shift2 X, Pos)) // // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate // in direction shift1 by Neg. The range [0, EltSize) means that we only need // to consider shift amounts with defined behavior. // // The IsRotate flag should be set when the LHS of both shifts is the same. // Otherwise if matching a general funnel shift, it should be clear. static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate) { … } // A subroutine of MatchRotate used once we have found an OR of two opposite // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the // former being preferred if supported. InnerPos and InnerNeg are Pos and // Neg with outer conversions stripped away. SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, bool HasPos, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL) { … } // A subroutine of MatchRotate used once we have found an OR of two opposite // shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the // former being preferred if supported. InnerPos and InnerNeg are Pos and // Neg with outer conversions stripped away. // TODO: Merge with MatchRotatePosNeg. SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, bool HasPos, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL) { … } // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. This also matches funnel shift patterns, similar to rotation but // with different shifted sources. SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { … } /// Recursively traverses the expression calculating the origin of the requested /// byte of the given value. Returns std::nullopt if the provider can't be /// calculated. /// /// For all the values except the root of the expression, we verify that the /// value has exactly one use and if not then return std::nullopt. This way if /// the origin of the byte is returned it's guaranteed that the values which /// contribute to the byte are not used outside of this expression. /// However, there is a special case when dealing with vector loads -- we allow /// more than one use if the load is a vector type. Since the values that /// contribute to the byte ultimately come from the ExtractVectorElements of the /// Load, we don't care if the Load has uses other than ExtractVectorElements, /// because those operations are independent from the pattern to be combined. /// For vector loads, we simply care that the ByteProviders are adjacent /// positions of the same vector, and their index matches the byte that is being /// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex /// is the index used in an ExtractVectorElement, and \p StartingIndex is the /// byte position we are trying to provide for the LoadCombine. If these do /// not match, then we can not combine the vector loads. \p Index uses the /// byte position we are trying to provide for and is matched against the /// shl and load size. The \p Index algorithm ensures the requested byte is /// provided for by the pattern, and the pattern does not over provide bytes. /// /// /// The supported LoadCombine pattern for vector loads is as follows /// or /// / \ /// or shl /// / \ | /// or shl zext /// / \ | | /// shl zext zext EVE* /// | | | | /// zext EVE* EVE* LOAD /// | | | /// EVE* LOAD LOAD /// | /// LOAD /// /// *ExtractVectorElement SDByteProvider; static std::optional<SDByteProvider> calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional<uint64_t> VectorIndex, unsigned StartingIndex = 0) { … } static unsigned littleEndianByteAt(unsigned BW, unsigned i) { … } static unsigned bigEndianByteAt(unsigned BW, unsigned i) { … } // Check if the bytes offsets we are looking at match with either big or // little endian value loaded. Return true for big endian, false for little // endian, and std::nullopt if match failed. static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets, int64_t FirstOffset) { … } // Look through one layer of truncate or extend. static SDValue stripTruncAndExt(SDValue Value) { … } /// Match a pattern where a wide type scalar value is stored by several narrow /// stores. Fold it into a single store or a BSWAP and a store if the targets /// supports it. /// /// Assuming little endian target: /// i8 *p = ... /// i32 val = ... /// p[0] = (val >> 0) & 0xFF; /// p[1] = (val >> 8) & 0xFF; /// p[2] = (val >> 16) & 0xFF; /// p[3] = (val >> 24) & 0xFF; /// => /// *((i32)p) = val; /// /// i8 *p = ... /// i32 val = ... /// p[0] = (val >> 24) & 0xFF; /// p[1] = (val >> 16) & 0xFF; /// p[2] = (val >> 8) & 0xFF; /// p[3] = (val >> 0) & 0xFF; /// => /// *((i32)p) = BSWAP(val); SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { … } /// Match a pattern where a wide type scalar value is loaded by several narrow /// loads and combined by shifts and ors. Fold it into a single load or a load /// and a BSWAP if the targets supports it. /// /// Assuming little endian target: /// i8 *a = ... /// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24) /// => /// i32 val = *((i32)a) /// /// i8 *a = ... /// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3] /// => /// i32 val = BSWAP(*((i32)a)) /// /// TODO: This rule matches complex patterns with OR node roots and doesn't /// interact well with the worklist mechanism. When a part of the pattern is /// updated (e.g. one of the loads) its direct users are put into the worklist, /// but the root node of the pattern which triggers the load combine is not /// necessarily a direct user of the changed node. For example, once the address /// of t28 load is reassociated load combine won't be triggered: /// t25: i32 = add t4, Constant:i32<2> /// t26: i64 = sign_extend t25 /// t27: i64 = add t2, t26 /// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64 /// t29: i32 = zero_extend t28 /// t32: i32 = shl t29, Constant:i8<8> /// t33: i32 = or t23, t32 /// As a possible fix visitLoad can check if the load can be a part of a load /// combine pattern and add corresponding OR roots to the worklist. SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { … } // If the target has andn, bsl, or a similar bit-select instruction, // we want to unfold masked merge, with canonical pattern of: // | A | |B| // ((x ^ y) & m) ^ y // | D | // Into: // (x & m) | (y & ~m) // If y is a constant, m is not a 'not', and the 'andn' does not work with // immediates, we unfold into a different pattern: // ~(~x & m) & (m | y) // If x is a constant, m is a 'not', and the 'andn' does not work with // immediates, we unfold into a different pattern: // (x | ~m) & ~(~m & ~y) // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at // the very least that breaks andnpd / andnps patterns, and because those // patterns are simplified in IR and shouldn't be created in the DAG SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { … } SDValue DAGCombiner::visitXOR(SDNode *N) { … } /// If we have a shift-by-constant of a bitwise logic op that itself has a /// shift-by-constant operand with identical opcode, we may be able to convert /// that into 2 independent shifts followed by the logic op. This is a /// throughput improvement. static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) { … } /// Handle transforms common to the three shifts, when the shift amount is a /// constant. /// We are looking for: (shift being one of shl/sra/srl) /// shift (binop X, C0), C1 /// And want to transform into: /// binop (shift X, C1), (shift C0, C1) SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { … } SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { … } SDValue DAGCombiner::visitRotate(SDNode *N) { … } SDValue DAGCombiner::visitSHL(SDNode *N) { … } // Transform a right shift of a multiply into a multiply-high. // Examples: // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b) // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b) static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI) { … } // fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y)) // This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG) { … } SDValue DAGCombiner::visitSRA(SDNode *N) { … } SDValue DAGCombiner::visitSRL(SDNode *N) { … } SDValue DAGCombiner::visitFunnelShift(SDNode *N) { … } SDValue DAGCombiner::visitSHLSAT(SDNode *N) { … } // Given a ABS node, detect the following patterns: // (ABS (SUB (EXTEND a), (EXTEND b))). // (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))). // Generates UABD/SABD instruction. SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) { … } SDValue DAGCombiner::visitABS(SDNode *N) { … } SDValue DAGCombiner::visitBSWAP(SDNode *N) { … } SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { … } SDValue DAGCombiner::visitCTLZ(SDNode *N) { … } SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { … } SDValue DAGCombiner::visitCTTZ(SDNode *N) { … } SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { … } SDValue DAGCombiner::visitCTPOP(SDNode *N) { … } static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI) { … } static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG) { … } /// Generate Min/Max node SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC) { … } /// If a (v)select has a condition value that is a sign-bit test, try to smear /// the condition operand sign-bit across the value width and use it as a mask. static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, SelectionDAG &DAG) { … } static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI) { … } SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { … } template <class MatchContextClass> static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL, SelectionDAG &DAG) { … } static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) { … } // Match SELECTs with absolute difference patterns. // (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b) // (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b) // (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b) // (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b) SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const SDLoc &DL) { … } SDValue DAGCombiner::visitSELECT(SDNode *N) { … } // This function assumes all the vselect's arguments are CONCAT_VECTOR // nodes and that the condition is a BV of ConstantSDNodes (or undefs). static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { … } bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL) { … } // Fold sext/zext of index into index type. bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG) { … } SDValue DAGCombiner::visitVPSCATTER(SDNode *N) { … } SDValue DAGCombiner::visitMSCATTER(SDNode *N) { … } SDValue DAGCombiner::visitMSTORE(SDNode *N) { … } SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) { … } SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) { … } SDValue DAGCombiner::visitVPGATHER(SDNode *N) { … } SDValue DAGCombiner::visitMGATHER(SDNode *N) { … } SDValue DAGCombiner::visitMLOAD(SDNode *N) { … } SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) { … } /// A vector select of 2 constant vectors can be simplified to math/logic to /// avoid a variable select instruction and possibly avoid constant loads. SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { … } SDValue DAGCombiner::visitVP_SELECT(SDNode *N) { … } SDValue DAGCombiner::visitVSELECT(SDNode *N) { … } SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { … } SDValue DAGCombiner::visitSETCC(SDNode *N) { … } SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) { … } /// Check if N satisfies: /// N is used once. /// N is a Load. /// The load is compatible with ExtOpcode. It means /// If load has explicit zero/sign extension, ExpOpcode must have the same /// extension. /// Otherwise returns true. static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) { … } /// Fold /// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) /// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y) /// (aext (select c, load x, load y)) -> (select c, extload x, extload y) /// This function is called by the DAGCombiner when visiting sext/zext/aext /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL, CombineLevel Level) { … } /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or /// a build_vector of constants. /// This function is called by the DAGCombiner when visiting sext/zext/aext /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). /// Vector extends are not folded if operations are legal; this is to /// avoid introducing illegal build_vector dag nodes. static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes) { … } // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" // transformation. Returns true if extension are possible and the above // mentioned transformation is profitable. static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl<SDNode *> &ExtendNodes, const TargetLowering &TLI) { … } void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue OrigLoad, SDValue ExtLoad, ISD::NodeType ExtType) { … } // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?). SDValue DAGCombiner::CombineExtLoad(SDNode *N) { … } // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) -> // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) { … } /// If we're narrowing or widening the result of a vector select and the final /// size is the same size as a setcc (compare) feeding the select, then try to /// apply the cast operation to the select's operands because matching vector /// sizes for a select condition and other operands should be more efficient. SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) { … } // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x))) // fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x))) static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType) { … } // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x))) // Only generate vector extloads when 1) they're legal, and 2) they are // deemed desirable by the target. NonNegZExt can be set to true if a zero // extend has the nonneg flag to allow use of sextload if profitable. static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt = false) { … } static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) { … } // fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load))) static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType) { … } static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations) { … } SDValue DAGCombiner::foldSextSetcc(SDNode *N) { … } SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { … } /// Given an extending node with a pop-count operand, if the target does not /// support a pop-count in the narrow source type but does support it in the /// destination type, widen the pop-count to the destination type. static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) { … } // If we have (zext (abs X)) where X is a type that will be promoted by type // legalization, convert to (abs (sext X)). But don't extend past a legal type. static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) { … } SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { … } SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { … } SDValue DAGCombiner::visitAssertExt(SDNode *N) { … } SDValue DAGCombiner::visitAssertAlign(SDNode *N) { … } /// If the result of a load is shifted/masked/truncated to an effectively /// narrower type, try to transform the load to a narrower type and/or /// use an extending load. SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { … } SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { … } static SDValue foldExtendVectorInregToExtendOfSubvector( SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations) { … } SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) { … } SDValue DAGCombiner::visitTRUNCATE_USAT_U(SDNode *N) { … } /// Detect patterns of truncation with unsigned saturation: /// /// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). /// Return the source value x to be truncated or SDValue() if the pattern was /// not matched. /// static SDValue detectUSatUPattern(SDValue In, EVT VT) { … } /// Detect patterns of truncation with signed saturation: /// (truncate (smin (smax (x, signed_min_of_dest_type), /// signed_max_of_dest_type)) to dest_type) /// or: /// (truncate (smax (smin (x, signed_max_of_dest_type), /// signed_min_of_dest_type)) to dest_type). /// /// Return the source value to be truncated or SDValue() if the pattern was not /// matched. static SDValue detectSSatSPattern(SDValue In, EVT VT) { … } /// Detect patterns of truncation with unsigned saturation: static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG, const SDLoc &DL) { … } static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT, SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG) { … } SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { … } static SDNode *getBuildPairElt(SDNode *N, unsigned i) { … } /// build_pair (load, load) -> load /// if load locations are consecutive. SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { … } static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) { … } SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { … } SDValue DAGCombiner::visitBITCAST(SDNode *N) { … } SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { … } SDValue DAGCombiner::visitFREEZE(SDNode *N) { … } /// We know that BV is a build_vector node with Constant, ConstantFP or Undef /// operands. DstEltVT indicates the destination element value type. SDValue DAGCombiner:: ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { … } // Returns true if floating point contraction is allowed on the FMUL-SDValue // `N` static bool isContractableFMUL(const TargetOptions &Options, SDValue N) { … } // Returns true if `N` can assume no infinities involved in its computation. static bool hasNoInfs(const TargetOptions &Options, SDValue N) { … } /// Try to perform FMA combining on a given FADD node. template <class MatchContextClass> SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { … } /// Try to perform FMA combining on a given FSUB node. template <class MatchContextClass> SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { … } /// Try to perform FMA combining on a given FMUL node based on the distributive /// law x * (y + 1) = x * y + x and variants thereof (commuted versions, /// subtraction instead of addition). SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { … } SDValue DAGCombiner::visitVP_FADD(SDNode *N) { … } SDValue DAGCombiner::visitFADD(SDNode *N) { … } SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) { … } SDValue DAGCombiner::visitFSUB(SDNode *N) { … } // Transform IEEE Floats: // (fmul C, (uitofp Pow2)) // -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa)) // (fdiv C, (uitofp Pow2)) // -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa)) // // The rationale is fmul/fdiv by a power of 2 is just change the exponent, so // there is no need for more than an add/sub. // // This is valid under the following circumstances: // 1) We are dealing with IEEE floats // 2) C is normal // 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds. // TODO: Much of this could also be used for generating `ldexp` on targets the // prefer it. SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) { … } SDValue DAGCombiner::visitFMUL(SDNode *N) { … } template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) { … } SDValue DAGCombiner::visitFMAD(SDNode *N) { … } // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal. // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) // Notice that this is not always beneficial. One reason is different targets // may have different costs for FDIV and FMUL, so sometimes the cost of two // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { … } SDValue DAGCombiner::visitFDIV(SDNode *N) { … } SDValue DAGCombiner::visitFREM(SDNode *N) { … } SDValue DAGCombiner::visitFSQRT(SDNode *N) { … } /// copysign(x, fp_extend(y)) -> copysign(x, y) /// copysign(x, fp_round(y)) -> copysign(x, y) /// Operands to the functions are the type of X and Y respectively. static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) { … } static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { … } SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { … } SDValue DAGCombiner::visitFPOW(SDNode *N) { … } static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { … } SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { … } SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { … } // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { … } SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { … } SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { … } SDValue DAGCombiner::visitXROUND(SDNode *N) { … } SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { … } SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { … } SDValue DAGCombiner::visitFCEIL(SDNode *N) { … } SDValue DAGCombiner::visitFTRUNC(SDNode *N) { … } SDValue DAGCombiner::visitFFREXP(SDNode *N) { … } SDValue DAGCombiner::visitFFLOOR(SDNode *N) { … } SDValue DAGCombiner::visitFNEG(SDNode *N) { … } SDValue DAGCombiner::visitFMinMax(SDNode *N) { … } SDValue DAGCombiner::visitFABS(SDNode *N) { … } SDValue DAGCombiner::visitBRCOND(SDNode *N) { … } SDValue DAGCombiner::rebuildSetCC(SDValue N) { … } // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. // SDValue DAGCombiner::visitBR_CC(SDNode *N) { … } static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI) { … } /// Try turning a load/store into a pre-indexed load/store when the base /// pointer is an add or subtract and it has other uses besides the load/store. /// After the transformation, the new indexed load/store has effectively folded /// the add/subtract in and all of its other uses are redirected to the /// new load/store. bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { … } static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI) { … } static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI) { … } /// Try to combine a load/store with a add/sub of the base pointer node into a /// post-indexed load/store. The transformation folded the add/subtract into the /// new indexed load/store effectively and all of its uses are redirected to the /// new load/store. bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { … } /// Return the base-pointer arithmetic from an indexed \p LD. SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { … } static inline ElementCount numVectorEltsOrZero(EVT T) { … } bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) { … } bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) { … } StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset) { … } SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { … } SDValue DAGCombiner::visitLOAD(SDNode *N) { … } namespace { /// Helper structure used to slice a load in smaller loads. /// Basically a slice is obtained from the following sequence: /// Origin = load Ty1, Base /// Shift = srl Ty1 Origin, CstTy Amount /// Inst = trunc Shift to Ty2 /// /// Then, it will be rewritten into: /// Slice = load SliceTy, Base + SliceOffset /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 /// /// SliceTy is deduced from the number of bits that are actually used to /// build Inst. struct LoadedSlice { … }; } // end anonymous namespace /// Check that all bits set in \p UsedBits form a dense region, i.e., /// \p UsedBits looks like 0..0 1..1 0..0. static bool areUsedBitsDense(const APInt &UsedBits) { … } /// Check whether or not \p First and \p Second are next to each other /// in memory. This means that there is no hole between the bits loaded /// by \p First and the bits loaded by \p Second. static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second) { … } /// Adjust the \p GlobalLSCost according to the target /// paring capabilities and the layout of the slices. /// \pre \p GlobalLSCost should account for at least as many loads as /// there is in the slices in \p LoadedSlices. static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, LoadedSlice::Cost &GlobalLSCost) { … } /// Check the profitability of all involved LoadedSlice. /// Currently, it is considered profitable if there is exactly two /// involved slices (1) which are (2) next to each other in memory, and /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). /// /// Note: The order of the elements in \p LoadedSlices may be modified, but not /// the elements themselves. /// /// FIXME: When the cost model will be mature enough, we can relax /// constraints (1) and (2). static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, const APInt &UsedBits, bool ForCodeSize) { … } /// If the given load, \p LI, is used only by trunc or trunc(lshr) /// operations, split it in the various pieces being extracted. /// /// This sort of thing is introduced by SROA. /// This slicing takes care not to insert overlapping loads. /// \pre LI is a simple load (i.e., not an atomic or volatile load). bool DAGCombiner::SliceUpLoad(SDNode *N) { … } /// Check to see if V is (and load (ptr), imm), where the load is having /// specific bytes cleared out. If so, return the byte size being masked out /// and the shift amount. static std::pair<unsigned, unsigned> CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { … } /// Check to see if IVal is something that provides a value as specified by /// MaskInfo. If so, replace the specified store with a narrower store of /// truncated IVal. static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC) { … } /// Look for sequence of load / op / store where op is one of 'or', 'xor', and /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try /// narrowing the load and store if it would end up being a win for performance /// or code size. SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { … } /// For a given floating point load / store pair, if the load value isn't used /// by any other operations, then consider transforming the pair to integer /// load / store operations if the target deems the transformation profitable. SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { … } // This is a helper function for visitMUL to check the profitability // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). // MulNode is the original multiply, AddNode is (add x, c1), // and ConstNode is c2. // // If the (add x, c1) has multiple uses, we could increase // the number of adds if we make this transformation. // It would only be worth doing this if we can remove a // multiply in the process. Check for that here. // To illustrate: // (A + c1) * c3 // (A + c2) * c3 // We're checking for cases where we have common "c3 * A" expressions. bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode, SDValue ConstNode) { … } SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) { … } bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) { … } bool DAGCombiner::mergeStoresOfConstantsOrVecElts( SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, bool IsConstantSrc, bool UseVector, bool UseTrunc) { … } void DAGCombiner::getStoreMergeCandidates( StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes, SDNode *&RootNode) { … } // We need to check that merging these stores does not cause a loop in the // DAG. Any store candidate may depend on another candidate indirectly through // its operands. Check in parallel by searching up from operands of candidates. bool DAGCombiner::checkMergeStoreCandidatesForDependencies( SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores, SDNode *RootNode) { … } unsigned DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes, int64_t ElementSizeBytes) const { … } bool DAGCombiner::tryStoreMergeOfConstants( SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores, EVT MemVT, SDNode *RootNode, bool AllowVectors) { … } bool DAGCombiner::tryStoreMergeOfExtracts( SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores, EVT MemVT, SDNode *RootNode) { … } bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores, EVT MemVT, SDNode *RootNode, bool AllowVectors, bool IsNonTemporalStore, bool IsNonTemporalLoad) { … } bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) { … } SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) { … } SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { … } // (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset) // // If a store of a load with an element inserted into it has no other // uses in between the chain, then we can consider the vector store // dead and replace it with just the single scalar element store. SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) { … } SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) { … } SDValue DAGCombiner::visitSTORE(SDNode *N) { … } SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { … } /// For the instruction sequence of store below, F and I values /// are bundled together as an i64 value before being stored into memory. /// Sometimes it is more efficent to generate separate stores for F and I, /// which can remove the bitwise instructions or sink them to colder places. /// /// (store (or (zext (bitcast F to i32) to i64), /// (shl (zext I to i64), 32)), addr) --> /// (store F, addr) and (store I, addr+4) /// /// Similarly, splitting for other merged store can also be beneficial, like: /// For pair of {i32, i32}, i64 store --> two i32 stores. /// For pair of {i32, i16}, i64 store --> two i32 stores. /// For pair of {i16, i16}, i32 store --> two i16 stores. /// For pair of {i16, i8}, i32 store --> two i16 stores. /// For pair of {i8, i8}, i16 store --> two i8 stores. /// /// We allow each target to determine specifically which kind of splitting is /// supported. /// /// The store patterns are commonly seen from the simple code snippet below /// if only std::make_pair(...) is sroa transformed before inlined into hoo. /// void goo(const std::pair<int, float> &); /// hoo() { /// ... /// goo(std::make_pair(tmp, ftmp)); /// ... /// } /// SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { … } // Merge an insertion into an existing shuffle: // (insert_vector_elt (vector_shuffle X, Y, Mask), // .(extract_vector_elt X, N), InsIndex) // --> (vector_shuffle X, Y, NewMask) // and variations where shuffle operands may be CONCAT_VECTORS. static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef<int> Mask, SmallVectorImpl<int> &NewMask, SDValue Elt, unsigned InsIndex) { … } // Merge an insertion into an existing shuffle: // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), // InsIndex) // --> (vector_shuffle X, Y) and variations where shuffle operands may be // CONCAT_VECTORS. SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) { … } // Convert a disguised subvector insertion into a shuffle: // insert_vector_elt V, (bitcast X from vector type), IdxC --> // bitcast(shuffle (bitcast V), (extended X), Mask) // Note: We do not use an insert_subvector node because that requires a // legal subvector type. SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { … } // Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if // possible and the new load will be quick. We use more loads but less shuffles // and inserts. SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) { … } SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { … } SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { … } /// Transform a vector binary operation into a scalar binary operation by moving /// the math/logic after an extract element of a vector. static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, const SDLoc &DL, bool LegalOperations) { … } // Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract, // recursively analyse all of it's users. and try to model themselves as // bit sequence extractions. If all of them agree on the new, narrower element // type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that // new element type, do so now. // This is mainly useful to recover from legalization that scalarized // the vector as wide elements, but tries to rebuild it with narrower elements. // // Some more nodes could be modelled if that helps cover interesting patterns. bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts( SDNode *N) { … } SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { … } // Simplify (build_vec (ext )) to (bitcast (build_vec )) SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { … } // Simplify (build_vec (trunc $1) // (trunc (srl $1 half-width)) // (trunc (srl $1 (2 * half-width)))) // to (bitcast $1) SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) { … } SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef<int> VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx, bool DidSplitVec) { … } static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) { … } // FIXME: promote to STLExtras. template <typename R, typename T> static auto getFirstIndexOf(R &&Range, const T &Val) { … } // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If the types of the vectors we're extracting from allow it, // turn this into a vector_shuffle node. SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { … } // Try to turn a build vector of zero extends of extract vector elts into a // a vector zero extend and possibly an extract subvector. // TODO: Support sign extend? // TODO: Allow undef elements? SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) { … } // If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND, // and all other elements being constant zero's, granularize the BUILD_VECTOR's // element width, absorbing the ZERO_EXTEND, turning it into a constant zero op. // This patten can appear during legalization. // // NOTE: This can be generalized to allow more than a single // non-constant-zero op, UNDEF's, and to be KnownBits-based, SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) { … } SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { … } static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { … } // Attempt to merge nested concat_vectors/undefs. // Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d)) // --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d) static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG) { … } // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at // most two distinct vectors the same size as the result, attempt to turn this // into a legal shuffle. static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { … } static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) { … } // See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of // the operands is a SHUFFLE_VECTOR, and all other operands are also operands // to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR. static SDValue combineConcatVectorOfShuffleAndItsOperands( SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations) { … } SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { … } // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find // if the subvector can be sourced for free. static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) { … } static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations) { … } /// If we are extracting a subvector produced by a wide binary operator try /// to use a narrow binary operator and/or avoid concatenation and extraction. static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations) { … } /// If we are extracting a subvector from a wide vector load, convert to a /// narrow load to eliminate the extraction: /// (extract_subvector (load wide vector)) --> (load narrow vector) static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { … } /// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), /// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?), /// EXTRACT_SUBVECTOR(Op?, ?), /// Mask')) /// iff it is legal and profitable to do so. Notably, the trimmed mask /// (containing only the elements that are extracted) /// must reference at most two subvectors. static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations) { … } SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { … } /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles /// followed by concatenation. Narrow vector ops may have better performance /// than wide ops, and this can unlock further narrowing of other vector ops. /// Targets can invert this transform later if it is not profitable. static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) { … } // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, // or turn a shuffle of a single concat into simpler shuffle then concat. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { … } // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. // // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always // a simplification in some sense, but it isn't appropriate in general: some // BUILD_VECTORs are substantially cheaper than others. The general case // of a BUILD_VECTOR requires inserting each element individually (or // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of // all constants is a single constant pool load. A BUILD_VECTOR where each // element is identical is a splat. A BUILD_VECTOR where most of the operands // are undef lowers to a small number of element insertions. // // To deal with this, we currently use a bunch of mostly arbitrary heuristics. // We don't fold shuffles where one side is a non-zero constant, and we don't // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate // non-constant operands. This seems to work out reasonably well in practice. static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI) { … } // Match shuffles that can be converted to *_vector_extend_in_reg. // This is often generated during legalization. // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)), // and returns the EVT to which the extension should be performed. // NOTE: this assumes that the src is the first operand of the shuffle. static std::optional<EVT> canCombineShuffleToExtendVectorInreg( unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations) { … } // Match shuffles that can be converted to any_vector_extend_in_reg. // This is often generated during legalization. // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)) static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations) { … } // Match shuffles that can be converted to zero_extend_vector_inreg. // This is often generated during legalization. // e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src)) static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations) { … } // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of // each source element of a large type into the lowest elements of a smaller // destination type. This is often generated during legalization. // If the source node itself was a '*_extend_vector_inreg' node then we should // then be able to remove it. static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) { … } // Combine shuffles of splat-shuffles of the form: // shuffle (shuffle V, undef, splat-mask), undef, M // If splat-mask contains undef elements, we need to be careful about // introducing undef's in the folded mask which are not the result of composing // the masks of the shuffles. static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) { … } // Combine shuffles of bitcasts into a shuffle of the bitcast type, providing // the mask can be treated as a larger type. static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations) { … } /// Combine shuffle of shuffle of the form: /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG) { … } /// If the shuffle mask is taking exactly one element from the first vector /// operand and passing through all other elements from the second vector /// operand, return the index of the mask element that is choosing an element /// from the first operand. Otherwise, return -1. static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) { … } /// If a shuffle inserts exactly one element from a source vector operand into /// another vector operand and we can access the specified element as a scalar, /// then we can eliminate the shuffle. static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) { … } /// If we have a unary shuffle of a shuffle, see if it can be folded away /// completely. This has the potential to lose undef knowledge because the first /// shuffle may not have an undef mask element where the second one does. So /// only call this after doing simplifications based on demanded elements. static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) { … } SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { … } SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { … } SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { … } SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { … } SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { … } SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) { … } SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) { … } SDValue DAGCombiner::visitVECREDUCE(SDNode *N) { … } SDValue DAGCombiner::visitVP_FSUB(SDNode *N) { … } SDValue DAGCombiner::visitVPOp(SDNode *N) { … } SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) { … } SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) { … } /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle /// with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { … } /// If a vector binop is performed on splat values, it may be profitable to /// extract, scalarize, and insert/splat. static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes) { … } /// Visit a vector cast operation, like FP_EXTEND. SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) { … } /// Visit a binary vector operation, like ADD. SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) { … } SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2) { … } /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values /// being selected between, see if we can simplify the select. Callers of this /// should assume that TheSelect is deleted if this returns true. As such, they /// should return the appropriate thing (e.g. the node) back to the top-level of /// the DAG combiner loop to avoid it being looked at. bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue RHS) { … } /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and /// bitwise 'and'. SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC) { … } // Fold select(cc, binop(), binop()) -> binop(select(), select()) etc. SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) { … } // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values. SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) { … } /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 /// in it. This may be a win when the constant is not otherwise available /// because it replaces two constant pool loads with one. SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset( const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC) { … } /// Simplify an expression of the form (N0 cond N1) ? N2 : N3 /// where 'cond' is the comparison specified by CC. SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare) { … } /// This is a stub for TargetLowering::SimplifySetCC. SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, bool foldBooleans) { … } /// Given an ISD::SDIV node expressing a divide by constant, return /// a DAG expression to select that will generate the same value by multiplying /// by a magic number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue DAGCombiner::BuildSDIV(SDNode *N) { … } /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a /// DAG expression that will generate the same value by right shifting. SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { … } /// Given an ISD::UDIV node expressing a divide by constant, return a DAG /// expression that will generate the same value by multiplying by a magic /// number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue DAGCombiner::BuildUDIV(SDNode *N) { … } /// Given an ISD::SREM node expressing a remainder by constant power of 2, /// return a DAG expression that will generate the same value. SDValue DAGCombiner::BuildSREMPow2(SDNode *N) { … } // This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp // // Returns the node that represents `Log2(Op)`. This may create a new node. If // we are unable to compute `Log2(Op)` its return `SDValue()`. // // All nodes will be created at `DL` and the output will be of type `VT`. // // This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set // `AssumeNonZero` if this function should simply assume (not require proving // `Op` is non-zero). static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero) { … } /// Determines the LogBase2 value for a non-null input value using the /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V). SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL, bool KnownNonZero, bool InexpensiveOnly, std::optional<EVT> OutVT) { … } /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) /// For the reciprocal, we need to find the zero of the function: /// F(X) = 1/X - A [which has a zero at X = 1/A] /// => /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form /// does not require additional intermediate precision] /// For the last iteration, put numerator N into it to gain more precision: /// Result = N X_i + X_i (N - N A X_i) SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags) { … } /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) /// For the reciprocal sqrt, we need to find the zero of the function: /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] /// => /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) /// As a result, we precompute A/2 prior to the iteration loop. SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, SDNodeFlags Flags, bool Reciprocal) { … } /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) /// For the reciprocal sqrt, we need to find the zero of the function: /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] /// => /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, SDNodeFlags Flags, bool Reciprocal) { … } /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if /// Op can be zero. SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Reciprocal) { … } SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) { … } SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { … } /// Return true if there is any possibility that the two addresses overlap. bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { … } /// Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallVectorImpl<SDValue> &Aliases) { … } /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain /// (aliasing node.) SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { … } // This function tries to collect a bunch of potentially interesting // nodes to improve the chains of, all at once. This might seem // redundant, as this function gets called when visiting every store // node, so why not let the work be done on each store as it's visited? // // I believe this is mainly important because mergeConsecutiveStores // is unable to deal with merging stores of different sizes, so unless // we improve the chains of all the potential candidates up-front // before running mergeConsecutiveStores, it might only see some of // the nodes that will eventually be candidates, and then not be able // to go from a partially-merged state to the desired final // fully-merged state. bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { … } bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { … } /// This is the entry point for the file. void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA, CodeGenOptLevel OptLevel) { … }