X86TargetTransformInfo.cpp | Explore in Territory

//===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// This file implements a TargetTransformInfo analysis pass specific to the
/// X86 target machine. It uses the target's detailed information to provide
/// more precise answers to certain TTI queries, while letting the target
/// independent and default TTI implementations handle the rest.
///
//===----------------------------------------------------------------------===//
/// About Cost Model numbers used below it's necessary to say the following:
/// the numbers correspond to some "generic" X86 CPU instead of usage of a
/// specific CPU model. Usually the numbers correspond to the CPU where the
/// feature first appeared. For example, if we do Subtarget.hasSSE42() in
/// the lookups below the cost is based on Nehalem as that was the first CPU
/// to support that feature level and thus has most likely the worst case cost,
/// although we may discard an outlying worst cost from one CPU (e.g. Atom).
///
/// Some examples of other technologies/CPUs:
///   SSE 3   - Pentium4 / Athlon64
///   SSE 4.1 - Penryn
///   SSE 4.2 - Nehalem / Silvermont
///   AVX     - Sandy Bridge / Jaguar / Bulldozer
///   AVX2    - Haswell / Ryzen
///   AVX-512 - Xeon Phi / Skylake
///
/// And some examples of instruction target dependent costs (latency)
///                   divss     sqrtss          rsqrtss
///   AMD K7          11-16     19              3
///   Piledriver      9-24      13-15           5
///   Jaguar          14        16              2
///   Pentium II,III  18        30              2
///   Nehalem         7-14      7-18            3
///   Haswell         10-13     11              5
///
/// Interpreting the 4 TargetCostKind types:
/// TCK_RecipThroughput and TCK_Latency should try to match the worst case
/// values reported by the CPU scheduler models (and llvm-mca).
/// TCK_CodeSize should match the instruction count (e.g. divss = 1), NOT the
/// actual encoding size of the instruction.
/// TCK_SizeAndLatency should match the worst case micro-op counts reported by
/// by the CPU scheduler models (and llvm-mca), to ensure that they are
/// compatible with the MicroOpBufferSize and LoopMicroOpBufferSize values which are
/// often used as the cost thresholds where TCK_SizeAndLatency is requested.
//===----------------------------------------------------------------------===//

#include "X86TargetTransformInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
#include <optional>

usingnamespacellvm;

#define DEBUG_TYPE …

//===----------------------------------------------------------------------===//
//
// X86 cost model.
//
//===----------------------------------------------------------------------===//

// Helper struct to store/access costs for each cost kind.
// TODO: Move this to allow other targets to use it?
struct CostKindCosts { … };
CostKindTblEntry;
TypeConversionCostKindTblEntry;

TargetTransformInfo::PopcntSupportKind
X86TTIImpl::getPopcntSupport(unsigned TyWidth) { … }

std::optional<unsigned> X86TTIImpl::getCacheSize(
  TargetTransformInfo::CacheLevel Level) const { … }

std::optional<unsigned> X86TTIImpl::getCacheAssociativity(
  TargetTransformInfo::CacheLevel Level) const { … }

unsigned X86TTIImpl::getNumberOfRegisters(unsigned ClassID) const { … }

bool X86TTIImpl::hasConditionalLoadStoreForType(Type *Ty) const { … }

TypeSize
X86TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { … }

unsigned X86TTIImpl::getLoadStoreVecRegBitWidth(unsigned) const { … }

unsigned X86TTIImpl::getMaxInterleaveFactor(ElementCount VF) { … }

InstructionCost X86TTIImpl::getArithmeticInstrCost(
    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
    TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
    ArrayRef<const Value *> Args,
    const Instruction *CxtI) { … }

InstructionCost
X86TTIImpl::getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
                            unsigned Opcode1, const SmallBitVector &OpcodeMask,
                            TTI::TargetCostKind CostKind) const { … }

InstructionCost X86TTIImpl::getShuffleCost(
    TTI::ShuffleKind Kind, VectorType *BaseTp, ArrayRef<int> Mask,
    TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
    ArrayRef<const Value *> Args, const Instruction *CxtI) { … }

InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
                                             Type *Src,
                                             TTI::CastContextHint CCH,
                                             TTI::TargetCostKind CostKind,
                                             const Instruction *I) { … }

InstructionCost X86TTIImpl::getCmpSelInstrCost(
    unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
    TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info,
    TTI::OperandValueInfo Op2Info, const Instruction *I) { … }

unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { … }

InstructionCost
X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                                  TTI::TargetCostKind CostKind) { … }

InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
                                               TTI::TargetCostKind CostKind,
                                               unsigned Index, Value *Op0,
                                               Value *Op1) { … }

InstructionCost
X86TTIImpl::getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
                                     bool Insert, bool Extract,
                                     TTI::TargetCostKind CostKind) { … }

InstructionCost
X86TTIImpl::getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
                                      int VF, const APInt &DemandedDstElts,
                                      TTI::TargetCostKind CostKind) { … }

InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                            MaybeAlign Alignment,
                                            unsigned AddressSpace,
                                            TTI::TargetCostKind CostKind,
                                            TTI::OperandValueInfo OpInfo,
                                            const Instruction *I) { … }

InstructionCost
X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment,
                                  unsigned AddressSpace,
                                  TTI::TargetCostKind CostKind) { … }

InstructionCost
X86TTIImpl::getPointersChainCost(ArrayRef<const Value *> Ptrs,
                                 const Value *Base,
                                 const TTI::PointersChainInfo &Info,
                                 Type *AccessTy, TTI::TargetCostKind CostKind) { … }

InstructionCost X86TTIImpl::getAddressComputationCost(Type *Ty,
                                                      ScalarEvolution *SE,
                                                      const SCEV *Ptr) { … }

InstructionCost
X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
                                       std::optional<FastMathFlags> FMF,
                                       TTI::TargetCostKind CostKind) { … }

InstructionCost X86TTIImpl::getMinMaxCost(Intrinsic::ID IID, Type *Ty,
                                          TTI::TargetCostKind CostKind,
                                          FastMathFlags FMF) { … }

InstructionCost
X86TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *ValTy,
                                   FastMathFlags FMF,
                                   TTI::TargetCostKind CostKind) { … }

/// Calculate the cost of materializing a 64-bit value. This helper
/// method might only calculate a fraction of a larger immediate. Therefore it
/// is valid to return a cost of ZERO.
InstructionCost X86TTIImpl::getIntImmCost(int64_t Val) { … }

InstructionCost X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
                                          TTI::TargetCostKind CostKind) { … }

InstructionCost X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
                                              const APInt &Imm, Type *Ty,
                                              TTI::TargetCostKind CostKind,
                                              Instruction *Inst) { … }

InstructionCost X86TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
                                                const APInt &Imm, Type *Ty,
                                                TTI::TargetCostKind CostKind) { … }

InstructionCost X86TTIImpl::getCFInstrCost(unsigned Opcode,
                                           TTI::TargetCostKind CostKind,
                                           const Instruction *I) { … }

int X86TTIImpl::getGatherOverhead() const { … }

int X86TTIImpl::getScatterOverhead() const { … }

// Return an average cost of Gather / Scatter instruction, maybe improved later.
InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode,
                                            TTI::TargetCostKind CostKind,
                                            Type *SrcVTy, const Value *Ptr,
                                            Align Alignment,
                                            unsigned AddressSpace) { … }

/// Calculate the cost of Gather / Scatter operation
InstructionCost X86TTIImpl::getGatherScatterOpCost(
    unsigned Opcode, Type *SrcVTy, const Value *Ptr, bool VariableMask,
    Align Alignment, TTI::TargetCostKind CostKind,
    const Instruction *I = nullptr) { … }

bool X86TTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
                               const TargetTransformInfo::LSRCost &C2) { … }

bool X86TTIImpl::canMacroFuseCmp() { … }

bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, Align Alignment) { … }

bool X86TTIImpl::isLegalMaskedStore(Type *DataType, Align Alignment) { … }

bool X86TTIImpl::isLegalNTLoad(Type *DataType, Align Alignment) { … }

bool X86TTIImpl::isLegalNTStore(Type *DataType, Align Alignment) { … }

bool X86TTIImpl::isLegalBroadcastLoad(Type *ElementTy,
                                      ElementCount NumElements) const { … }

bool X86TTIImpl::isLegalMaskedExpandLoad(Type *DataTy, Align Alignment) { … }

bool X86TTIImpl::isLegalMaskedCompressStore(Type *DataTy, Align Alignment) { … }

bool X86TTIImpl::supportsGather() const { … }

bool X86TTIImpl::forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) { … }

bool X86TTIImpl::isLegalMaskedGatherScatter(Type *DataTy, Align Alignment) { … }

bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, Align Alignment) { … }

bool X86TTIImpl::isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
                                 unsigned Opcode1,
                                 const SmallBitVector &OpcodeMask) const { … }

bool X86TTIImpl::isLegalMaskedScatter(Type *DataType, Align Alignment) { … }

bool X86TTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) { … }

bool X86TTIImpl::isExpensiveToSpeculativelyExecute(const Instruction* I) { … }

bool X86TTIImpl::isFCmpOrdCheaperThanFCmpZero(Type *Ty) { … }

bool X86TTIImpl::areInlineCompatible(const Function *Caller,
                                     const Function *Callee) const { … }

bool X86TTIImpl::areTypesABICompatible(const Function *Caller,
                                       const Function *Callee,
                                       const ArrayRef<Type *> &Types) const { … }

X86TTIImpl::TTI::MemCmpExpansionOptions
X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { … }

bool X86TTIImpl::prefersVectorizedAddressing() const { … }

bool X86TTIImpl::supportsEfficientVectorElementLoadStore() const { … }

bool X86TTIImpl::enableInterleavedAccessVectorization() { … }

// Get estimation for interleaved load/store operations and strided load.
// \p Indices contains indices for strided load.
// \p Factor - the factor of interleaving.
// AVX-512 provides 3-src shuffles that significantly reduces the cost.
InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
    unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
    ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
    TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { … }

InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
    unsigned Opcode, Type *BaseTy, unsigned Factor, ArrayRef<unsigned> Indices,
    Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
    bool UseMaskForCond, bool UseMaskForGaps) { … }

InstructionCost X86TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                                 StackOffset BaseOffset,
                                                 bool HasBaseReg, int64_t Scale,
                                                 unsigned AddrSpace) const { … }

InstructionCost X86TTIImpl::getBranchMispredictPenalty() const { … }
llvm/llvm/lib/Target/X86/X86TargetTransformInfo.cpp