#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <deque>
#include <sstream>
#include <unordered_map>
#include <vector>
usingnamespacellvm;
usingnamespacellvm::memprof;
#define DEBUG_TYPE …
STATISTIC(FunctionClonesAnalysis,
"Number of function clones created during whole program analysis");
STATISTIC(FunctionClonesThinBackend,
"Number of function clones created during ThinLTO backend");
STATISTIC(FunctionsClonedThinBackend,
"Number of functions that had clones created during ThinLTO backend");
STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly "
"cloned) during whole program analysis");
STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) "
"during whole program analysis");
STATISTIC(AllocTypeNotColdThinBackend,
"Number of not cold static allocations (possibly cloned) during "
"ThinLTO backend");
STATISTIC(AllocTypeColdThinBackend, "Number of cold static allocations "
"(possibly cloned) during ThinLTO backend");
STATISTIC(OrigAllocsThinBackend,
"Number of original (not cloned) allocations with memprof profiles "
"during ThinLTO backend");
STATISTIC(
AllocVersionsThinBackend,
"Number of allocation versions (including clones) during ThinLTO backend");
STATISTIC(MaxAllocVersionsThinBackend,
"Maximum number of allocation versions created for an original "
"allocation during ThinLTO backend");
STATISTIC(UnclonableAllocsThinBackend,
"Number of unclonable ambigous allocations during ThinLTO backend");
STATISTIC(RemovedEdgesWithMismatchedCallees,
"Number of edges removed due to mismatched callees (profiled vs IR)");
STATISTIC(FoundProfiledCalleeCount,
"Number of profiled callees found via tail calls");
STATISTIC(FoundProfiledCalleeDepth,
"Aggregate depth of profiled callees found via tail calls");
STATISTIC(FoundProfiledCalleeMaxDepth,
"Maximum depth of profiled callees found via tail calls");
STATISTIC(FoundProfiledCalleeNonUniquelyCount,
"Number of profiled callees found via multiple tail call chains");
static cl::opt<std::string> DotFilePathPrefix(
"memprof-dot-file-path-prefix", cl::init(""), cl::Hidden,
cl::value_desc("filename"),
cl::desc("Specify the path prefix of the MemProf dot files."));
static cl::opt<bool> ExportToDot("memprof-export-to-dot", cl::init(false),
cl::Hidden,
cl::desc("Export graph to dot files."));
static cl::opt<bool>
DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden,
cl::desc("Dump CallingContextGraph to stdout after each stage."));
static cl::opt<bool>
VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden,
cl::desc("Perform verification checks on CallingContextGraph."));
static cl::opt<bool>
VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden,
cl::desc("Perform frequent verification checks on nodes."));
static cl::opt<std::string> MemProfImportSummary(
"memprof-import-summary",
cl::desc("Import summary to use for testing the ThinLTO backend via opt"),
cl::Hidden);
static cl::opt<unsigned>
TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5),
cl::Hidden,
cl::desc("Max depth to recursively search for missing "
"frames through tail calls."));
namespace llvm {
cl::opt<bool> EnableMemProfContextDisambiguation(
"enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation"));
cl::opt<bool> SupportsHotColdNew(
"supports-hot-cold-new", cl::init(false), cl::Hidden,
cl::desc("Linking with hot/cold operator new interfaces"));
}
extern cl::opt<bool> MemProfReportHintedSizes;
namespace {
template <typename DerivedCCG, typename FuncTy, typename CallTy>
class CallsiteContextGraph { … };
ContextNode;
ContextEdge;
FuncInfo;
CallInfo;
class ModuleCallsiteContextGraph
: public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
Instruction *> { … };
struct IndexCall : public PointerUnion<CallsiteInfo *, AllocInfo *> { … };
class IndexCallsiteContextGraph
: public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
IndexCall> { … };
}
namespace llvm {
template <>
struct DenseMapInfo<typename CallsiteContextGraph<
ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo>
: public DenseMapInfo<std::pair<Instruction *, unsigned>> { … };
template <>
struct DenseMapInfo<typename CallsiteContextGraph<
IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo>
: public DenseMapInfo<std::pair<IndexCall, unsigned>> { … };
template <>
struct DenseMapInfo<IndexCall>
: public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> { … };
}
namespace {
struct FieldSeparator { … };
raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) { … }
AllocationType allocTypeToUse(uint8_t AllocTypes) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
bool allocTypesMatch(
const std::vector<uint8_t> &InAllocTypes,
const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
&Edges) { … }
}
template <typename DerivedCCG, typename FuncTy, typename CallTy>
typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
const CallInfo &C) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
const CallInfo &C) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
uint64_t StackId) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType,
unsigned int ContextId) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
ContextEdge *Edge, EdgeIter *EI, bool CalleeIter) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<
DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
findEdgeFromCallee(const ContextNode *Callee) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
findEdgeFromCaller(const ContextNode *Caller) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
eraseCalleeEdge(const ContextEdge *Edge) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
eraseCallerEdge(const ContextEdge *Edge) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
DenseSet<uint32_t> &ContextIds) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
uint8_t
CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
CallInfo Call, const FuncTy *F) { … }
static std::string getAllocTypeString(uint8_t AllocTypes) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
template <class NodeT, class IteratorT>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType,
uint64_t TotalSize) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
DenseSet<uint32_t>
CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
const DenseSet<uint32_t> &StackSequenceContextIds,
DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
propagateDuplicateContextIds(
const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee,
DenseSet<uint32_t> RemainingContextIds) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
static void checkEdge(
const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
bool CheckEdges = true) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
assignStackNodesPostOrder(
ContextNode *Node, DenseSet<const ContextNode *> &Visited,
DenseMap<uint64_t, std::vector<CallContextInfo>>
&StackIdToMatchingCalls,
DenseMap<CallInfo, CallInfo> &CallToMatchingCall) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() { … }
uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) { … }
uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) { … }
static const std::string MemProfCloneSuffix = …;
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo) { … }
std::string ModuleCallsiteContextGraph::getLabel(const Function *Func,
const Instruction *Call,
unsigned CloneNo) const { … }
std::string IndexCallsiteContextGraph::getLabel(const FunctionSummary *Func,
const IndexCall &Call,
unsigned CloneNo) const { … }
std::vector<uint64_t>
ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
Instruction *Call) { … }
std::vector<uint64_t>
IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
template <class NodeT, class IteratorT>
std::vector<uint64_t>
CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
CallStack<NodeT, IteratorT> &CallsiteContext) { … }
ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
Module &M,
llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
: … { … }
IndexCallsiteContextGraph::IndexCallsiteContextGraph(
ModuleSummaryIndex &Index,
llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing)
: … { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy,
CallTy>::handleCallsitesWithMultipleTargets() { … }
uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { … }
uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
CallTy Call, EdgeIter &EI,
MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) { … }
bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
const Function *ProfiledCallee, Value *CurCallee, unsigned Depth,
std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
bool &FoundMultipleCalleeChains) { … }
bool ModuleCallsiteContextGraph::calleeMatchesFunc(
Instruction *Call, const Function *Func, const Function *CallerFunc,
std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) { … }
bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
Instruction *Call2) { … }
bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth,
std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
bool &FoundMultipleCalleeChains) { … }
bool IndexCallsiteContextGraph::calleeMatchesFunc(
IndexCall &Call, const FunctionSummary *Func,
const FunctionSummary *CallerFunc,
std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) { … }
bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
const { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
raw_ostream &OS) const { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
const { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
raw_ostream &OS) const { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump() const { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
raw_ostream &OS) const { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
raw_ostream &OS) const { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const { … }
GraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
DOTGraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
std::string Label) const { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI,
DenseSet<uint32_t> ContextIdsToMove) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
ContextNode *NewCallee, EdgeIter *CallerEdgeI,
bool NewClone,
DenseSet<uint32_t> ContextIdsToMove) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
recursivelyRemoveNoneTypeCalleeEdges(
ContextNode *Node, DenseSet<const ContextNode *> &Visited) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() { … }
bool checkColdOrNotCold(uint8_t AllocType) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
ContextNode *Node, DenseSet<const ContextNode *> &Visited,
const DenseSet<uint32_t> &AllocContextIds) { … }
void ModuleCallsiteContextGraph::updateAllocationCall(
CallInfo &Call, AllocationType AllocType) { … }
void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &Call,
AllocationType AllocType) { … }
void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
FuncInfo CalleeFunc) { … }
void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
FuncInfo CalleeFunc) { … }
CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
Instruction *>::FuncInfo
ModuleCallsiteContextGraph::cloneFunctionForCallsite(
FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) { … }
CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
IndexCall>::FuncInfo
IndexCallsiteContextGraph::cloneFunctionForCallsite(
FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { … }
static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE,
std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
&FuncToAliasMap) { … }
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M,
const ModuleSummaryIndex *ImportSummary) { … }
bool MemProfContextDisambiguation::applyImport(Module &M) { … }
template <typename DerivedCCG, typename FuncTy, typename CallTy>
bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() { … }
bool MemProfContextDisambiguation::processModule(
Module &M,
llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) { … }
MemProfContextDisambiguation::MemProfContextDisambiguation(
const ModuleSummaryIndex *Summary)
: … { … }
PreservedAnalyses MemProfContextDisambiguation::run(Module &M,
ModuleAnalysisManager &AM) { … }
void MemProfContextDisambiguation::run(
ModuleSummaryIndex &Index,
llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing) { … }