#include "AArch64TargetTransformInfo.h"
#include "AArch64ExpandImm.h"
#include "AArch64PerfectShuffle.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include <algorithm>
#include <optional>
usingnamespacellvm;
usingnamespacellvm::PatternMatch;
#define DEBUG_TYPE …
static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
cl::init(true), cl::Hidden);
static cl::opt<unsigned> SVEGatherOverhead("sve-gather-overhead", cl::init(10),
cl::Hidden);
static cl::opt<unsigned> SVEScatterOverhead("sve-scatter-overhead",
cl::init(10), cl::Hidden);
static cl::opt<unsigned> SVETailFoldInsnThreshold("sve-tail-folding-insn-threshold",
cl::init(15), cl::Hidden);
static cl::opt<unsigned>
NeonNonConstStrideOverhead("neon-nonconst-stride-overhead", cl::init(10),
cl::Hidden);
static cl::opt<unsigned> CallPenaltyChangeSM(
"call-penalty-sm-change", cl::init(5), cl::Hidden,
cl::desc(
"Penalty of calling a function that requires a change to PSTATE.SM"));
static cl::opt<unsigned> InlineCallPenaltyChangeSM(
"inline-call-penalty-sm-change", cl::init(10), cl::Hidden,
cl::desc("Penalty of inlining a call that requires a change to PSTATE.SM"));
static cl::opt<bool> EnableOrLikeSelectOpt("enable-aarch64-or-like-select",
cl::init(true), cl::Hidden);
static cl::opt<bool> EnableLSRCostOpt("enable-aarch64-lsr-cost-opt",
cl::init(true), cl::Hidden);
static cl::opt<unsigned>
BaseHistCntCost("aarch64-base-histcnt-cost", cl::init(8), cl::Hidden,
cl::desc("The cost of a histcnt instruction"));
namespace {
class TailFoldingOption { … };
}
TailFoldingOption TailFoldingOptionLoc;
cl::opt<TailFoldingOption, true, cl::parser<std::string>> SVETailFolding(
"sve-tail-folding",
cl::desc(
"Control the use of vectorisation using tail-folding for SVE where the"
" option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
"\ndisabled (Initial) No loop types will vectorize using "
"tail-folding"
"\ndefault (Initial) Uses the default tail-folding settings for "
"the target CPU"
"\nall (Initial) All legal loop types will vectorize using "
"tail-folding"
"\nsimple (Initial) Use tail-folding for simple loops (not "
"reductions or recurrences)"
"\nreductions Use tail-folding for loops containing reductions"
"\nnoreductions Inverse of above"
"\nrecurrences Use tail-folding for loops containing fixed order "
"recurrences"
"\nnorecurrences Inverse of above"
"\nreverse Use tail-folding for loops requiring reversed "
"predicates"
"\nnoreverse Inverse of above"),
cl::location(TailFoldingOptionLoc));
static cl::opt<bool> EnableFixedwidthAutovecInStreamingMode(
"enable-fixedwidth-autovec-in-streaming-mode", cl::init(false), cl::Hidden);
static cl::opt<bool> EnableScalableAutovecInStreamingMode(
"enable-scalable-autovec-in-streaming-mode", cl::init(false), cl::Hidden);
static bool isSMEABIRoutineCall(const CallInst &CI) { … }
static bool hasPossibleIncompatibleOps(const Function *F) { … }
bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const { … }
bool AArch64TTIImpl::areTypesABICompatible(
const Function *Caller, const Function *Callee,
const ArrayRef<Type *> &Types) const { … }
unsigned
AArch64TTIImpl::getInlineCallPenalty(const Function *F, const CallBase &Call,
unsigned DefaultCallPenalty) const { … }
bool AArch64TTIImpl::shouldMaximizeVectorBandwidth(
TargetTransformInfo::RegisterKind K) const { … }
InstructionCost AArch64TTIImpl::getIntImmCost(int64_t Val) { … }
InstructionCost AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) { … }
InstructionCost AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind,
Instruction *Inst) { … }
InstructionCost
AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) { … }
TargetTransformInfo::PopcntSupportKind
AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) { … }
static bool isUnpackedVectorVT(EVT VecVT) { … }
static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) { … }
InstructionCost
AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) { … }
static std::optional<Instruction *> processPhiNode(InstCombiner &IC,
IntrinsicInst &II) { … }
static std::optional<Instruction *>
tryCombineFromSVBoolBinOp(InstCombiner &IC, IntrinsicInst &II) { … }
static std::optional<Instruction *>
instCombineConvertFromSVBool(InstCombiner &IC, IntrinsicInst &II) { … }
static bool isAllActivePredicate(Value *Pred) { … }
static std::optional<Instruction *>
instCombineSVENoActiveReplace(InstCombiner &IC, IntrinsicInst &II,
bool hasInactiveVector) { … }
static std::optional<Instruction *>
instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II) { … }
static std::optional<Instruction *>
instCombineSVENoActiveUnaryErase(InstCombiner &IC, IntrinsicInst &II,
int PredPos) { … }
static std::optional<Instruction *>
instCombineSVENoActiveZero(InstCombiner &IC, IntrinsicInst &II) { … }
static std::optional<Instruction *> instCombineSVESel(InstCombiner &IC,
IntrinsicInst &II) { … }
static std::optional<Instruction *> instCombineSVEDup(InstCombiner &IC,
IntrinsicInst &II) { … }
static std::optional<Instruction *> instCombineSVEDupX(InstCombiner &IC,
IntrinsicInst &II) { … }
static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
IntrinsicInst &II) { … }
static std::optional<Instruction *> instCombineSVELast(InstCombiner &IC,
IntrinsicInst &II) { … }
static std::optional<Instruction *> instCombineSVECondLast(InstCombiner &IC,
IntrinsicInst &II) { … }
static std::optional<Instruction *> instCombineRDFFR(InstCombiner &IC,
IntrinsicInst &II) { … }
static std::optional<Instruction *>
instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts) { … }
static std::optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
IntrinsicInst &II) { … }
template <Intrinsic::ID MulOpc, typename Intrinsic::ID FuseOpc>
static std::optional<Instruction *>
instCombineSVEVectorFuseMulAddSub(InstCombiner &IC, IntrinsicInst &II,
bool MergeIntoAddendOp) { … }
static std::optional<Instruction *>
instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) { … }
static std::optional<Instruction *>
instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) { … }
static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic) { … }
static std::optional<Instruction *>
instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II) { … }
static std::optional<Instruction *> instCombineSVEAllActive(IntrinsicInst &II,
Intrinsic::ID IID) { … }
static std::optional<Instruction *>
instCombineSVEAllOrNoActive(InstCombiner &IC, IntrinsicInst &II,
Intrinsic::ID IID) { … }
static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC,
IntrinsicInst &II) { … }
static std::optional<Instruction *>
instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) { … }
static std::optional<Instruction *>
instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) { … }
static std::optional<Instruction *>
instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) { … }
static std::optional<Instruction *>
instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) { … }
static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC,
IntrinsicInst &II) { … }
static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
IntrinsicInst &II,
Intrinsic::ID IID) { … }
static std::optional<Instruction *> instCombineSVEUnpack(InstCombiner &IC,
IntrinsicInst &II) { … }
static std::optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
IntrinsicInst &II) { … }
static std::optional<Instruction *> instCombineSVEUzp1(InstCombiner &IC,
IntrinsicInst &II) { … }
static std::optional<Instruction *> instCombineSVEZip(InstCombiner &IC,
IntrinsicInst &II) { … }
static std::optional<Instruction *>
instCombineLD1GatherIndex(InstCombiner &IC, IntrinsicInst &II) { … }
static std::optional<Instruction *>
instCombineST1ScatterIndex(InstCombiner &IC, IntrinsicInst &II) { … }
static std::optional<Instruction *> instCombineSVESDIV(InstCombiner &IC,
IntrinsicInst &II) { … }
bool SimplifyValuePattern(SmallVector<Value *> &Vec, bool AllowPoison) { … }
static std::optional<Instruction *> instCombineSVEDupqLane(InstCombiner &IC,
IntrinsicInst &II) { … }
static std::optional<Instruction *> instCombineMaxMinNM(InstCombiner &IC,
IntrinsicInst &II) { … }
static std::optional<Instruction *> instCombineSVESrshl(InstCombiner &IC,
IntrinsicInst &II) { … }
std::optional<Instruction *>
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const { … }
std::optional<Value *> AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic(
InstCombiner &IC, IntrinsicInst &II, APInt OrigDemandedElts,
APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
std::function<void(Instruction *, unsigned, APInt, APInt &)>
SimplifyAndSetOp) const { … }
bool AArch64TTIImpl::enableScalableVectorization() const { … }
TypeSize
AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { … }
bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
ArrayRef<const Value *> Args,
Type *SrcOverrideTy) { … }
bool AArch64TTIImpl::isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst,
Type *Src) { … }
InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
Type *Src,
TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
const Instruction *I) { … }
InstructionCost AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode,
Type *Dst,
VectorType *VecTy,
unsigned Index) { … }
InstructionCost AArch64TTIImpl::getCFInstrCost(unsigned Opcode,
TTI::TargetCostKind CostKind,
const Instruction *I) { … }
InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(const Instruction *I,
Type *Val,
unsigned Index,
bool HasRealUse) { … }
InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
TTI::TargetCostKind CostKind,
unsigned Index, Value *Op0,
Value *Op1) { … }
InstructionCost AArch64TTIImpl::getVectorInstrCost(const Instruction &I,
Type *Val,
TTI::TargetCostKind CostKind,
unsigned Index) { … }
InstructionCost AArch64TTIImpl::getScalarizationOverhead(
VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
TTI::TargetCostKind CostKind) { … }
InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
ArrayRef<const Value *> Args,
const Instruction *CxtI) { … }
InstructionCost AArch64TTIImpl::getAddressComputationCost(Type *Ty,
ScalarEvolution *SE,
const SCEV *Ptr) { … }
InstructionCost AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy,
CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I) { … }
AArch64TTIImpl::TTI::MemCmpExpansionOptions
AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { … }
bool AArch64TTIImpl::prefersVectorizedAddressing() const { … }
InstructionCost
AArch64TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind) { … }
static unsigned getSVEGatherScatterOverhead(unsigned Opcode,
const AArch64Subtarget *ST) { … }
InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) { … }
bool AArch64TTIImpl::useNeonVector(const Type *Ty) const { … }
InstructionCost AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
MaybeAlign Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
TTI::OperandValueInfo OpInfo,
const Instruction *I) { … }
InstructionCost AArch64TTIImpl::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) { … }
InstructionCost
AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { … }
unsigned AArch64TTIImpl::getMaxInterleaveFactor(ElementCount VF) { … }
static void
getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TargetTransformInfo::UnrollingPreferences &UP) { … }
void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE) { … }
void AArch64TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) { … }
Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType) { … }
bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
MemIntrinsicInfo &Info) { … }
bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) { … }
bool AArch64TTIImpl::isLegalToVectorizeReduction(
const RecurrenceDescriptor &RdxDesc, ElementCount VF) const { … }
InstructionCost
AArch64TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
FastMathFlags FMF,
TTI::TargetCostKind CostKind) { … }
InstructionCost AArch64TTIImpl::getArithmeticReductionCostSVE(
unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) { … }
InstructionCost
AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
std::optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) { … }
InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index) { … }
InstructionCost AArch64TTIImpl::getShuffleCost(
TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
ArrayRef<const Value *> Args, const Instruction *CxtI) { … }
static bool containsDecreasingPointers(Loop *TheLoop,
PredicatedScalarEvolution *PSE) { … }
bool AArch64TTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) { … }
InstructionCost
AArch64TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
StackOffset BaseOffset, bool HasBaseReg,
int64_t Scale, unsigned AddrSpace) const { … }
bool AArch64TTIImpl::shouldTreatInstructionLikeSelect(const Instruction *I) { … }
bool AArch64TTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
const TargetTransformInfo::LSRCost &C2) { … }