////===- SampleProfileLoadBaseImpl.h - Profile loader base impl --*- C++-*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// This file provides the interface for the sampled PGO profile loader base /// implementation. // //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H #define LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/PseudoProbe.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/ProfileData/SampleProfReader.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/GenericDomTree.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/SampleProfileInference.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" namespace llvm { usingnamespacesampleprof; usingnamespacesampleprofutil; ProfileCount; namespace vfs { class FileSystem; } // namespace vfs #define DEBUG_TYPE … namespace afdo_detail { template <typename BlockT> struct IRTraits; template <> struct IRTraits<BasicBlock> { … }; } // end namespace afdo_detail // This class serves sample counts correlation for SampleProfileLoader by // analyzing pseudo probes and their function descriptors injected by // SampleProfileProber. class PseudoProbeManager { … }; extern cl::opt<bool> SampleProfileUseProfi; static inline bool skipProfileForFunction(const Function &F) { … } static inline void buildTopDownFuncOrder(LazyCallGraph &CG, std::vector<Function *> &FunctionOrderList) { … } template <typename FT> class SampleProfileLoaderBaseImpl { … }; /// Clear all the per-function data used to load samples and propagate weights. template <typename BT> void SampleProfileLoaderBaseImpl<BT>::clearFunctionData(bool ResetDT) { … } #ifndef NDEBUG /// Print the weight of edge \p E on stream \p OS. /// /// \param OS Stream to emit the output to. /// \param E Edge to print. template <typename BT> void SampleProfileLoaderBaseImpl<BT>::printEdgeWeight(raw_ostream &OS, Edge E) { OS << "weight[" << E.first->getName() << "->" << E.second->getName() << "]: " << EdgeWeights[E] << "\n"; } /// Print the equivalence class of block \p BB on stream \p OS. /// /// \param OS Stream to emit the output to. /// \param BB Block to print. template <typename BT> void SampleProfileLoaderBaseImpl<BT>::printBlockEquivalence( raw_ostream &OS, const BasicBlockT *BB) { const BasicBlockT *Equiv = EquivalenceClass[BB]; OS << "equivalence[" << BB->getName() << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n"; } /// Print the weight of block \p BB on stream \p OS. /// /// \param OS Stream to emit the output to. /// \param BB Block to print. template <typename BT> void SampleProfileLoaderBaseImpl<BT>::printBlockWeight( raw_ostream &OS, const BasicBlockT *BB) const { const auto &I = BlockWeights.find(BB); uint64_t W = (I == BlockWeights.end() ? 0 : I->second); OS << "weight[" << BB->getName() << "]: " << W << "\n"; } #endif /// Get the weight for an instruction. /// /// The "weight" of an instruction \p Inst is the number of samples /// collected on that instruction at runtime. To retrieve it, we /// need to compute the line number of \p Inst relative to the start of its /// function. We use HeaderLineno to compute the offset. We then /// look up the samples collected for \p Inst using BodySamples. /// /// \param Inst Instruction to query. /// /// \returns the weight of \p Inst. template <typename BT> ErrorOr<uint64_t> SampleProfileLoaderBaseImpl<BT>::getInstWeight(const InstructionT &Inst) { … } template <typename BT> ErrorOr<uint64_t> SampleProfileLoaderBaseImpl<BT>::getInstWeightImpl(const InstructionT &Inst) { … } template <typename BT> ErrorOr<uint64_t> SampleProfileLoaderBaseImpl<BT>::getProbeWeight(const InstructionT &Inst) { … } /// Compute the weight of a basic block. /// /// The weight of basic block \p BB is the maximum weight of all the /// instructions in BB. /// /// \param BB The basic block to query. /// /// \returns the weight for \p BB. template <typename BT> ErrorOr<uint64_t> SampleProfileLoaderBaseImpl<BT>::getBlockWeight(const BasicBlockT *BB) { … } /// Compute and store the weights of every basic block. /// /// This populates the BlockWeights map by computing /// the weights of every basic block in the CFG. /// /// \param F The function to query. template <typename BT> bool SampleProfileLoaderBaseImpl<BT>::computeBlockWeights(FunctionT &F) { … } /// Get the FunctionSamples for an instruction. /// /// The FunctionSamples of an instruction \p Inst is the inlined instance /// in which that instruction is coming from. We traverse the inline stack /// of that instruction, and match it with the tree nodes in the profile. /// /// \param Inst Instruction to query. /// /// \returns the FunctionSamples pointer to the inlined instance. template <typename BT> const FunctionSamples *SampleProfileLoaderBaseImpl<BT>::findFunctionSamples( const InstructionT &Inst) const { … } /// Find equivalence classes for the given block. /// /// This finds all the blocks that are guaranteed to execute the same /// number of times as \p BB1. To do this, it traverses all the /// descendants of \p BB1 in the dominator or post-dominator tree. /// /// A block BB2 will be in the same equivalence class as \p BB1 if /// the following holds: /// /// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2 /// is a descendant of \p BB1 in the dominator tree, then BB2 should /// dominate BB1 in the post-dominator tree. /// /// 2- Both BB2 and \p BB1 must be in the same loop. /// /// For every block BB2 that meets those two requirements, we set BB2's /// equivalence class to \p BB1. /// /// \param BB1 Block to check. /// \param Descendants Descendants of \p BB1 in either the dom or pdom tree. /// \param DomTree Opposite dominator tree. If \p Descendants is filled /// with blocks from \p BB1's dominator tree, then /// this is the post-dominator tree, and vice versa. template <typename BT> void SampleProfileLoaderBaseImpl<BT>::findEquivalencesFor( BasicBlockT *BB1, ArrayRef<BasicBlockT *> Descendants, PostDominatorTreeT *DomTree) { … } /// Find equivalence classes. /// /// Since samples may be missing from blocks, we can fill in the gaps by setting /// the weights of all the blocks in the same equivalence class to the same /// weight. To compute the concept of equivalence, we use dominance and loop /// information. Two blocks B1 and B2 are in the same equivalence class if B1 /// dominates B2, B2 post-dominates B1 and both are in the same loop. /// /// \param F The function to query. template <typename BT> void SampleProfileLoaderBaseImpl<BT>::findEquivalenceClasses(FunctionT &F) { … } /// Visit the given edge to decide if it has a valid weight. /// /// If \p E has not been visited before, we copy to \p UnknownEdge /// and increment the count of unknown edges. /// /// \param E Edge to visit. /// \param NumUnknownEdges Current number of unknown edges. /// \param UnknownEdge Set if E has not been visited before. /// /// \returns E's weight, if known. Otherwise, return 0. template <typename BT> uint64_t SampleProfileLoaderBaseImpl<BT>::visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge) { … } /// Propagate weights through incoming/outgoing edges. /// /// If the weight of a basic block is known, and there is only one edge /// with an unknown weight, we can calculate the weight of that edge. /// /// Similarly, if all the edges have a known count, we can calculate the /// count of the basic block, if needed. /// /// \param F Function to process. /// \param UpdateBlockCount Whether we should update basic block counts that /// has already been annotated. /// /// \returns True if new weights were assigned to edges or blocks. template <typename BT> bool SampleProfileLoaderBaseImpl<BT>::propagateThroughEdges( FunctionT &F, bool UpdateBlockCount) { … } /// Build in/out edge lists for each basic block in the CFG. /// /// We are interested in unique edges. If a block B1 has multiple /// edges to another block B2, we only add a single B1->B2 edge. template <typename BT> void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) { … } /// Propagate weights into edges /// /// The following rules are applied to every block BB in the CFG: /// /// - If BB has a single predecessor/successor, then the weight /// of that edge is the weight of the block. /// /// - If all incoming or outgoing edges are known except one, and the /// weight of the block is already known, the weight of the unknown /// edge will be the weight of the block minus the sum of all the known /// edges. If the sum of all the known edges is larger than BB's weight, /// we set the unknown edge weight to zero. /// /// - If there is a self-referential edge, and the weight of the block is /// known, the weight for that edge is set to the weight of the block /// minus the weight of the other incoming edges to that block (if /// known). template <typename BT> void SampleProfileLoaderBaseImpl<BT>::propagateWeights(FunctionT &F) { … } template <typename FT> void SampleProfileLoaderBaseImpl<FT>::applyProfi( FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights, BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights) { … } /// Generate branch weight metadata for all branches in \p F. /// /// Branch weights are computed out of instruction samples using a /// propagation heuristic. Propagation proceeds in 3 phases: /// /// 1- Assignment of block weights. All the basic blocks in the function /// are initial assigned the same weight as their most frequently /// executed instruction. /// /// 2- Creation of equivalence classes. Since samples may be missing from /// blocks, we can fill in the gaps by setting the weights of all the /// blocks in the same equivalence class to the same weight. To compute /// the concept of equivalence, we use dominance and loop information. /// Two blocks B1 and B2 are in the same equivalence class if B1 /// dominates B2, B2 post-dominates B1 and both are in the same loop. /// /// 3- Propagation of block weights into edges. This uses a simple /// propagation heuristic. The following rules are applied to every /// block BB in the CFG: /// /// - If BB has a single predecessor/successor, then the weight /// of that edge is the weight of the block. /// /// - If all the edges are known except one, and the weight of the /// block is already known, the weight of the unknown edge will /// be the weight of the block minus the sum of all the known /// edges. If the sum of all the known edges is larger than BB's weight, /// we set the unknown edge weight to zero. /// /// - If there is a self-referential edge, and the weight of the block is /// known, the weight for that edge is set to the weight of the block /// minus the weight of the other incoming edges to that block (if /// known). /// /// Since this propagation is not guaranteed to finalize for every CFG, we /// only allow it to proceed for a limited number of iterations (controlled /// by -sample-profile-max-propagate-iterations). /// /// FIXME: Try to replace this propagation heuristic with a scheme /// that is guaranteed to finalize. A work-list approach similar to /// the standard value propagation algorithm used by SSA-CCP might /// work here. /// /// \param F The function to query. /// /// \returns true if \p F was modified. Returns false, otherwise. template <typename BT> bool SampleProfileLoaderBaseImpl<BT>::computeAndPropagateWeights( FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) { … } template <typename BT> void SampleProfileLoaderBaseImpl<BT>::initWeightPropagation( FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) { … } template <typename BT> void SampleProfileLoaderBaseImpl<BT>::finalizeWeightPropagation( FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) { … } template <typename BT> void SampleProfileLoaderBaseImpl<BT>::emitCoverageRemarks(FunctionT &F) { … } /// Get the line number for the function header. /// /// This looks up function \p F in the current compilation unit and /// retrieves the line number where the function is defined. This is /// line 0 for all the samples read from the profile file. Every line /// number is relative to this line. /// /// \param F Function object to query. /// /// \returns the line number where \p F is defined. If it returns 0, /// it means that there is no debug information available for \p F. template <typename BT> unsigned SampleProfileLoaderBaseImpl<BT>::getFunctionLoc(FunctionT &F) { … } #undef DEBUG_TYPE } // namespace llvm #endif // LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H