#include "X86.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
usingnamespacellvm;
usingnamespacePatternMatch;
#define DEBUG_TYPE …
#ifndef NDEBUG
static bool isV256I32Ty(Type *Ty) {
if (auto *FVT = dyn_cast<FixedVectorType>(Ty))
return FVT->getNumElements() == 256 &&
FVT->getElementType()->isIntegerTy(32);
return false;
}
#endif
static cl::opt<bool>
X86ScalarizeAMX("enable-x86-scalar-amx", cl::init(false), cl::Hidden,
cl::desc("X86: enable AMX scalarizition."));
namespace {
class X86LowerAMXIntrinsics { … };
}
BasicBlock *X86LowerAMXIntrinsics::createLoop(BasicBlock *Preheader,
BasicBlock *Exit, Value *Bound,
Value *Step, StringRef Name,
IRBuilderBase &B, Loop *L) { … }
template <bool IsTileLoad>
Value *X86LowerAMXIntrinsics::createTileLoadStoreLoops(
BasicBlock *Start, BasicBlock *End, IRBuilderBase &B, Value *Row,
Value *Col, Value *Ptr, Value *Stride, Value *Tile) { … }
template <Intrinsic::ID IntrID>
std::enable_if_t<IntrID == Intrinsic::x86_tdpbssd_internal ||
IntrID == Intrinsic::x86_tdpbsud_internal ||
IntrID == Intrinsic::x86_tdpbusd_internal ||
IntrID == Intrinsic::x86_tdpbuud_internal ||
IntrID == Intrinsic::x86_tdpbf16ps_internal,
Value *>
X86LowerAMXIntrinsics::createTileDPLoops(BasicBlock *Start, BasicBlock *End,
IRBuilderBase &B, Value *Row,
Value *Col, Value *K, Value *Acc,
Value *LHS, Value *RHS) { … }
template <Intrinsic::ID IntrID>
std::enable_if_t<IntrID == Intrinsic::x86_tdpbssd_internal ||
IntrID == Intrinsic::x86_tdpbsud_internal ||
IntrID == Intrinsic::x86_tdpbusd_internal ||
IntrID == Intrinsic::x86_tdpbuud_internal ||
IntrID == Intrinsic::x86_tdpbf16ps_internal,
bool>
X86LowerAMXIntrinsics::lowerTileDP(Instruction *TileDP) { … }
template <bool IsTileLoad>
bool X86LowerAMXIntrinsics::lowerTileLoadStore(Instruction *TileLoadStore) { … }
bool X86LowerAMXIntrinsics::lowerTileZero(Instruction *TileZero) { … }
bool X86LowerAMXIntrinsics::visit() { … }
namespace {
class X86LowerAMXIntrinsicsLegacyPass : public FunctionPass { … };
}
static const char PassName[] = …;
char X86LowerAMXIntrinsicsLegacyPass::ID = …;
INITIALIZE_PASS_BEGIN(X86LowerAMXIntrinsicsLegacyPass, DEBUG_TYPE, PassName,
false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(X86LowerAMXIntrinsicsLegacyPass, DEBUG_TYPE, PassName,
false, false)
FunctionPass *llvm::createX86LowerAMXIntrinsicsPass() { … }