//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines a DAG pattern matching instruction selector for X86, // converting from a legalized dag to a X86 dag. // //===----------------------------------------------------------------------===// #include "X86ISelDAGToDAG.h" #include "X86.h" #include "X86MachineFunctionInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include <cstdint> usingnamespacellvm; #define DEBUG_TYPE … #define PASS_NAME … STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true), cl::desc("Enable setting constant bits to reduce size of mask immediates"), cl::Hidden); static cl::opt<bool> EnablePromoteAnyextLoad( "x86-promote-anyext-load", cl::init(true), cl::desc("Enable promoting aligned anyext load to wider load"), cl::Hidden); extern cl::opt<bool> IndirectBranchTracking; //===----------------------------------------------------------------------===// // Pattern Matcher Implementation //===----------------------------------------------------------------------===// namespace { /// This corresponds to X86AddressMode, but uses SDValue's instead of register /// numbers for the leaves of the matched tree. struct X86ISelAddressMode { … }; } namespace { //===--------------------------------------------------------------------===// /// ISel - X86-specific code to select X86 machine instructions for /// SelectionDAG operations. /// class X86DAGToDAGISel final : public SelectionDAGISel { … }; class X86DAGToDAGISelLegacy : public SelectionDAGISelLegacy { … }; } char X86DAGToDAGISelLegacy::ID = …; INITIALIZE_PASS(…) // Returns true if this masked compare can be implemented legally with this // type. static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) { … } // Returns true if we can assume the writer of the mask has zero extended it // for us. bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const { … } bool X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { … } // Indicates it is profitable to form an AVX512 masked operation. Returning // false will favor a masked register-register masked move or vblendm and the // operation will be selected separately. bool X86DAGToDAGISel::isProfitableToFormMaskedOp(SDNode *N) const { … } /// Replace the original chain operand of the call with /// load's chain operand and move load below the call's chain operand. static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, SDValue Call, SDValue OrigChain) { … } /// Return true if call address is a load and it can be /// moved below CALLSEQ_START and the chains leading up to the call. /// Return the CALLSEQ_START by reference as a second output. /// In the case of a tail call, there isn't a callseq node between the call /// chain and the load. static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { … } static bool isEndbrImm64(uint64_t Imm) { … } static bool needBWI(MVT VT) { … } void X86DAGToDAGISel::PreprocessISelDAG() { … } // Look for a redundant movzx/movsx that can occur after an 8-bit divrem. bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) { … } void X86DAGToDAGISel::PostprocessISelDAG() { … } /// Emit any code that needs to be executed only in the main function. void X86DAGToDAGISel::emitSpecialCodeForMain() { … } void X86DAGToDAGISel::emitFunctionEntryCode() { … } static bool isDispSafeForFrameIndex(int64_t Val) { … } bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM) { … } bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM, bool AllowSegmentRegForX32) { … } /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing /// mode. These wrap things that will resolve down into a symbol reference. /// If no match is possible, this returns true, otherwise it returns false. bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) { … } /// Add the specified node to the specified addressing mode, returning true if /// it cannot be done. This just pattern matches for the addressing mode. bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) { … } bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth) { … } // Insert a node into the DAG at least before the Pos node's position. This // will reposition the node as needed, and will assign it a node ID that is <= // the Pos node's ID. Note that this does *not* preserve the uniqueness of node // IDs! The selection DAG must no longer depend on their uniqueness when this // is used. static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { … } // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if // safe. This allows us to convert the shift and and into an h-register // extract and a scaled index. Returns false if the simplification is // performed. static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM) { … } // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this // allows us to fold the shift into this addressing mode. Returns false if the // transform succeeded. static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, X86ISelAddressMode &AM) { … } // Implement some heroics to detect shifts of masked values where the mask can // be replaced by extending the shift and undoing that in the addressing mode // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in // the addressing mode. This results in code such as: // // int f(short *y, int *lookup_table) { // ... // return *y + lookup_table[*y >> 11]; // } // // Turning into: // movzwl (%rdi), %eax // movl %eax, %ecx // shrl $11, %ecx // addl (%rsi,%rcx,4), %eax // // Instead of: // movzwl (%rdi), %eax // movl %eax, %ecx // shrl $9, %ecx // andl $124, %rcx // addl (%rsi,%rcx), %eax // // Note that this function assumes the mask is provided as a mask *after* the // value is shifted. The input chain may or may not match that, but computing // such a mask is trivial. static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM) { … } // Transform "(X >> SHIFT) & (MASK << C1)" to // "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be // matched to a BEXTR later. Returns false if the simplification is performed. static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM, const X86Subtarget &Subtarget) { … } // Attempt to peek further into a scaled index register, collecting additional // extensions / offsets / etc. Returns /p N if we can't peek any further. SDValue X86DAGToDAGISel::matchIndexRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth) { … } bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth) { … } /// Helper for MatchAddress. Add the specified node to the /// specified addressing mode without any further recursion. bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) { … } bool X86DAGToDAGISel::matchVectorAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth) { … } /// Helper for selectVectorAddr. Handles things that can be folded into a /// gather/scatter address. The index register and scale should have already /// been handled. bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) { … } bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, SDValue IndexOp, SDValue ScaleOp, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { … } /// Returns true if it is able to pattern match an addressing mode. /// It returns the operands which make up the maximal addressing mode it can /// match by reference. /// /// Parent is the parent node of the addr operand that is being matched. It /// is always a load, store, atomic node, or null. It is only null when /// checking memory operands for inline asm nodes. bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { … } bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) { … } bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { … } /// Calls SelectAddr and determines if the maximal addressing /// mode it matches can be cost effectively emitted as an LEA instruction. bool X86DAGToDAGISel::selectLEAAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { … } /// This is only run on TargetGlobalTLSAddress nodes. bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { … } bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) { … } bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { … } bool X86DAGToDAGISel::tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { … } /// Return an SDNode that returns the value of the global base register. /// Output instructions required to initialize the global base register, /// if necessary. SDNode *X86DAGToDAGISel::getGlobalBaseReg() { … } bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const { … } X86::CondCode X86DAGToDAGISel::getCondFromNode(SDNode *N) const { … } /// Test whether the given X86ISD::CMP node has any users that use a flag /// other than ZF. bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const { … } /// Test whether the given X86ISD::CMP node has any uses which require the SF /// flag to be accurate. bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const { … } static bool mayUseCarryFlag(X86::CondCode CC) { … } /// Test whether the given node which sets flags has any uses which require the /// CF flag to be accurate. bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const { … } /// Check whether or not the chain ending in StoreNode is suitable for doing /// the {load; op; store} to modify transformation. static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, SDValue StoredVal, SelectionDAG *CurDAG, unsigned LoadOpNo, LoadSDNode *&LoadNode, SDValue &InputChain) { … } // Change a chain of {load; op; store} of the same value into a simple op // through memory of that value, if the uses of the modified value and its // address are suitable. // // The tablegen pattern memory operand pattern is currently not able to match // the case where the EFLAGS on the original operation are used. // // To move this to tablegen, we'll need to improve tablegen to allow flags to // be transferred from a node in the pattern to the result node, probably with // a new keyword. For example, we have this // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", // [(store (add (loadi64 addr:$dst), -1), addr:$dst), // (implicit EFLAGS)]>; // but maybe need something like this // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", // [(store (add (loadi64 addr:$dst), -1), addr:$dst), // (transferrable EFLAGS)]>; // // Until then, we manually fold these and instruction select the operation // here. bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { … } // See if this is an X & Mask that we can match to BEXTR/BZHI. // Where Mask is one of the following patterns: // a) x & (1 << nbits) - 1 // b) x & ~(-1 << nbits) // c) x & (-1 >> (32 - y)) // d) x << (32 - y) >> (32 - y) // e) (1 << nbits) - 1 bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { … } // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI. MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) { … } // Emit a PCMISTR(I/M) instruction. MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, const SDLoc &dl, MVT VT, SDNode *Node) { … } // Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need // to emit a second instruction after this one. This is needed since we have two // copyToReg nodes glued before this and we need to continue that glue through. MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, const SDLoc &dl, MVT VT, SDNode *Node, SDValue &InGlue) { … } bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) { … } bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) { … } bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA, SDNode *ParentB, SDNode *ParentC, SDValue A, SDValue B, SDValue C, uint8_t Imm) { … } // Try to match two logic ops to a VPTERNLOG. // FIXME: Handle more complex patterns that use an operand more than once? bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { … } /// If the high bits of an 'and' operand are known zero, try setting the /// high bits of an 'and' constant operand to produce a smaller encoding by /// creating a small, sign-extended negative immediate rather than a large /// positive one. This reverses a transform in SimplifyDemandedBits that /// shrinks mask constants by clearing bits. There is also a possibility that /// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that /// case, just replace the 'and'. Return 'true' if the node is replaced. bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) { … } static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad, bool FoldedBCast, bool Masked) { … } // Try to create VPTESTM instruction. If InMask is not null, it will be used // to form a masked operation. bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue InMask) { … } // Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it // into vpternlog. bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) { … } void X86DAGToDAGISel::Select(SDNode *Node) { … } bool X86DAGToDAGISel::SelectInlineAsmMemoryOperand( const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector<SDValue> &OutOps) { … } X86ISelDAGToDAGPass::X86ISelDAGToDAGPass(X86TargetMachine &TM) : … { … } /// This pass converts a legalized DAG into a X86-specific DAG, /// ready for instruction scheduling. FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, CodeGenOptLevel OptLevel) { … }