X86ISelDAGToDAG.cpp | Explore in Territory

//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines a DAG pattern matching instruction selector for X86,
// converting from a legalized dag to a X86 dag.
//
//===----------------------------------------------------------------------===//

#include "X86ISelDAGToDAG.h"
#include "X86.h"
#include "X86MachineFunctionInfo.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include <cstdint>

usingnamespacellvm;

#define DEBUG_TYPE …
#define PASS_NAME …

STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");

static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
    cl::desc("Enable setting constant bits to reduce size of mask immediates"),
    cl::Hidden);

static cl::opt<bool> EnablePromoteAnyextLoad(
    "x86-promote-anyext-load", cl::init(true),
    cl::desc("Enable promoting aligned anyext load to wider load"), cl::Hidden);

extern cl::opt<bool> IndirectBranchTracking;

//===----------------------------------------------------------------------===//
//                      Pattern Matcher Implementation
//===----------------------------------------------------------------------===//

namespace {
  /// This corresponds to X86AddressMode, but uses SDValue's instead of register
  /// numbers for the leaves of the matched tree.
  struct X86ISelAddressMode { … };
}

namespace {
  //===--------------------------------------------------------------------===//
  /// ISel - X86-specific code to select X86 machine instructions for
  /// SelectionDAG operations.
  ///
  class X86DAGToDAGISel final : public SelectionDAGISel { … };

  class X86DAGToDAGISelLegacy : public SelectionDAGISelLegacy { … };
}

char X86DAGToDAGISelLegacy::ID = …;

INITIALIZE_PASS(…)

// Returns true if this masked compare can be implemented legally with this
// type.
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) { … }

// Returns true if we can assume the writer of the mask has zero extended it
// for us.
bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const { … }

bool
X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { … }

// Indicates it is profitable to form an AVX512 masked operation. Returning
// false will favor a masked register-register masked move or vblendm and the
// operation will be selected separately.
bool X86DAGToDAGISel::isProfitableToFormMaskedOp(SDNode *N) const { … }

/// Replace the original chain operand of the call with
/// load's chain operand and move load below the call's chain operand.
static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
                               SDValue Call, SDValue OrigChain) { … }

/// Return true if call address is a load and it can be
/// moved below CALLSEQ_START and the chains leading up to the call.
/// Return the CALLSEQ_START by reference as a second output.
/// In the case of a tail call, there isn't a callseq node between the call
/// chain and the load.
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { … }

static bool isEndbrImm64(uint64_t Imm) { … }

static bool needBWI(MVT VT) { … }

void X86DAGToDAGISel::PreprocessISelDAG() { … }

// Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) { … }

void X86DAGToDAGISel::PostprocessISelDAG() { … }


/// Emit any code that needs to be executed only in the main function.
void X86DAGToDAGISel::emitSpecialCodeForMain() { … }

void X86DAGToDAGISel::emitFunctionEntryCode() { … }

static bool isDispSafeForFrameIndex(int64_t Val) { … }

bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
                                            X86ISelAddressMode &AM) { … }

bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM,
                                         bool AllowSegmentRegForX32) { … }

/// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
/// mode. These wrap things that will resolve down into a symbol reference.
/// If no match is possible, this returns true, otherwise it returns false.
bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) { … }

/// Add the specified node to the specified addressing mode, returning true if
/// it cannot be done. This just pattern matches for the addressing mode.
bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) { … }

bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM,
                               unsigned Depth) { … }

// Insert a node into the DAG at least before the Pos node's position. This
// will reposition the node as needed, and will assign it a node ID that is <=
// the Pos node's ID. Note that this does *not* preserve the uniqueness of node
// IDs! The selection DAG must no longer depend on their uniqueness when this
// is used.
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { … }

// Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
// safe. This allows us to convert the shift and and into an h-register
// extract and a scaled index. Returns false if the simplification is
// performed.
static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
                                      uint64_t Mask,
                                      SDValue Shift, SDValue X,
                                      X86ISelAddressMode &AM) { … }

// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
// allows us to fold the shift into this addressing mode. Returns false if the
// transform succeeded.
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
                                        X86ISelAddressMode &AM) { … }

// Implement some heroics to detect shifts of masked values where the mask can
// be replaced by extending the shift and undoing that in the addressing mode
// scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
// (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
// the addressing mode. This results in code such as:
//
//   int f(short *y, int *lookup_table) {
//     ...
//     return *y + lookup_table[*y >> 11];
//   }
//
// Turning into:
//   movzwl (%rdi), %eax
//   movl %eax, %ecx
//   shrl $11, %ecx
//   addl (%rsi,%rcx,4), %eax
//
// Instead of:
//   movzwl (%rdi), %eax
//   movl %eax, %ecx
//   shrl $9, %ecx
//   andl $124, %rcx
//   addl (%rsi,%rcx), %eax
//
// Note that this function assumes the mask is provided as a mask *after* the
// value is shifted. The input chain may or may not match that, but computing
// such a mask is trivial.
static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
                                    uint64_t Mask,
                                    SDValue Shift, SDValue X,
                                    X86ISelAddressMode &AM) { … }

// Transform "(X >> SHIFT) & (MASK << C1)" to
// "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be
// matched to a BEXTR later. Returns false if the simplification is performed.
static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N,
                                   uint64_t Mask,
                                   SDValue Shift, SDValue X,
                                   X86ISelAddressMode &AM,
                                   const X86Subtarget &Subtarget) { … }

// Attempt to peek further into a scaled index register, collecting additional
// extensions / offsets / etc. Returns /p N if we can't peek any further.
SDValue X86DAGToDAGISel::matchIndexRecursively(SDValue N,
                                               X86ISelAddressMode &AM,
                                               unsigned Depth) { … }

bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                              unsigned Depth) { … }

/// Helper for MatchAddress. Add the specified node to the
/// specified addressing mode without any further recursion.
bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) { … }

bool X86DAGToDAGISel::matchVectorAddressRecursively(SDValue N,
                                                    X86ISelAddressMode &AM,
                                                    unsigned Depth) { … }

/// Helper for selectVectorAddr. Handles things that can be folded into a
/// gather/scatter address. The index register and scale should have already
/// been handled.
bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) { … }

bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr,
                                       SDValue IndexOp, SDValue ScaleOp,
                                       SDValue &Base, SDValue &Scale,
                                       SDValue &Index, SDValue &Disp,
                                       SDValue &Segment) { … }

/// Returns true if it is able to pattern match an addressing mode.
/// It returns the operands which make up the maximal addressing mode it can
/// match by reference.
///
/// Parent is the parent node of the addr operand that is being matched.  It
/// is always a load, store, atomic node, or null.  It is only null when
/// checking memory operands for inline asm nodes.
bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
                                 SDValue &Scale, SDValue &Index,
                                 SDValue &Disp, SDValue &Segment) { … }

bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) { … }

bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
                                         SDValue &Scale, SDValue &Index,
                                         SDValue &Disp, SDValue &Segment) { … }

/// Calls SelectAddr and determines if the maximal addressing
/// mode it matches can be cost effectively emitted as an LEA instruction.
bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
                                    SDValue &Base, SDValue &Scale,
                                    SDValue &Index, SDValue &Disp,
                                    SDValue &Segment) { … }

/// This is only run on TargetGlobalTLSAddress nodes.
bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
                                        SDValue &Scale, SDValue &Index,
                                        SDValue &Disp, SDValue &Segment) { … }

bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) { … }

bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
                                  SDValue &Base, SDValue &Scale,
                                  SDValue &Index, SDValue &Disp,
                                  SDValue &Segment) { … }

bool X86DAGToDAGISel::tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N,
                                       SDValue &Base, SDValue &Scale,
                                       SDValue &Index, SDValue &Disp,
                                       SDValue &Segment) { … }

/// Return an SDNode that returns the value of the global base register.
/// Output instructions required to initialize the global base register,
/// if necessary.
SDNode *X86DAGToDAGISel::getGlobalBaseReg() { … }

bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const { … }

X86::CondCode X86DAGToDAGISel::getCondFromNode(SDNode *N) const { … }

/// Test whether the given X86ISD::CMP node has any users that use a flag
/// other than ZF.
bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const { … }

/// Test whether the given X86ISD::CMP node has any uses which require the SF
/// flag to be accurate.
bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const { … }

static bool mayUseCarryFlag(X86::CondCode CC) { … }

/// Test whether the given node which sets flags has any uses which require the
/// CF flag to be accurate.
 bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const { … }

/// Check whether or not the chain ending in StoreNode is suitable for doing
/// the {load; op; store} to modify transformation.
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
                                        SDValue StoredVal, SelectionDAG *CurDAG,
                                        unsigned LoadOpNo,
                                        LoadSDNode *&LoadNode,
                                        SDValue &InputChain) { … }

// Change a chain of {load; op; store} of the same value into a simple op
// through memory of that value, if the uses of the modified value and its
// address are suitable.
//
// The tablegen pattern memory operand pattern is currently not able to match
// the case where the EFLAGS on the original operation are used.
//
// To move this to tablegen, we'll need to improve tablegen to allow flags to
// be transferred from a node in the pattern to the result node, probably with
// a new keyword. For example, we have this
// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
//  [(store (add (loadi64 addr:$dst), -1), addr:$dst),
//   (implicit EFLAGS)]>;
// but maybe need something like this
// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
//  [(store (add (loadi64 addr:$dst), -1), addr:$dst),
//   (transferrable EFLAGS)]>;
//
// Until then, we manually fold these and instruction select the operation
// here.
bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { … }

// See if this is an  X & Mask  that we can match to BEXTR/BZHI.
// Where Mask is one of the following patterns:
//   a) x &  (1 << nbits) - 1
//   b) x & ~(-1 << nbits)
//   c) x &  (-1 >> (32 - y))
//   d) x << (32 - y) >> (32 - y)
//   e) (1 << nbits) - 1
bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { … }

// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) { … }

// Emit a PCMISTR(I/M) instruction.
MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
                                             bool MayFoldLoad, const SDLoc &dl,
                                             MVT VT, SDNode *Node) { … }

// Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need
// to emit a second instruction after this one. This is needed since we have two
// copyToReg nodes glued before this and we need to continue that glue through.
MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
                                             bool MayFoldLoad, const SDLoc &dl,
                                             MVT VT, SDNode *Node,
                                             SDValue &InGlue) { … }

bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) { … }

bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) { … }

bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA,
                                     SDNode *ParentB, SDNode *ParentC,
                                     SDValue A, SDValue B, SDValue C,
                                     uint8_t Imm) { … }

// Try to match two logic ops to a VPTERNLOG.
// FIXME: Handle more complex patterns that use an operand more than once?
bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { … }

/// If the high bits of an 'and' operand are known zero, try setting the
/// high bits of an 'and' constant operand to produce a smaller encoding by
/// creating a small, sign-extended negative immediate rather than a large
/// positive one. This reverses a transform in SimplifyDemandedBits that
/// shrinks mask constants by clearing bits. There is also a possibility that
/// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
/// case, just replace the 'and'. Return 'true' if the node is replaced.
bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) { … }

static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad,
                              bool FoldedBCast, bool Masked) { … }

// Try to create VPTESTM instruction. If InMask is not null, it will be used
// to form a masked operation.
bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
                                 SDValue InMask) { … }

// Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it
// into vpternlog.
bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) { … }

void X86DAGToDAGISel::Select(SDNode *Node) { … }

bool X86DAGToDAGISel::SelectInlineAsmMemoryOperand(
    const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
    std::vector<SDValue> &OutOps) { … }

X86ISelDAGToDAGPass::X86ISelDAGToDAGPass(X86TargetMachine &TM)
    : … { … }

/// This pass converts a legalized DAG into a X86-specific DAG,
/// ready for instruction scheduling.
FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
                                     CodeGenOptLevel OptLevel) { … }
llvm/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp