//===- Mem2Reg.cpp - Promotes memory slots into values ----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "mlir/Transforms/Mem2Reg.h" #include "mlir/Analysis/DataLayoutAnalysis.h" #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Analysis/TopologicalSortUtils.h" #include "mlir/IR/Builders.h" #include "mlir/IR/Dominance.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/RegionKindInterface.h" #include "mlir/IR/Value.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/MemorySlotInterfaces.h" #include "mlir/Transforms/Passes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/GenericIteratedDominanceFrontier.h" namespace mlir { #define GEN_PASS_DEF_MEM2REG #include "mlir/Transforms/Passes.h.inc" } // namespace mlir #define DEBUG_TYPE … usingnamespacemlir; /// mem2reg /// /// This pass turns unnecessary uses of automatically allocated memory slots /// into direct Value-based operations. For example, it will simplify storing a /// constant in a memory slot to immediately load it to a direct use of that /// constant. In other words, given a memory slot addressed by a non-aliased /// "pointer" Value, mem2reg removes all the uses of that pointer. /// /// Within a block, this is done by following the chain of stores and loads of /// the slot and replacing the results of loads with the values previously /// stored. If a load happens before any other store, a poison value is used /// instead. /// /// Control flow can create situations where a load could be replaced by /// multiple possible stores depending on the control flow path taken. As a /// result, this pass must introduce new block arguments in some blocks to /// accommodate for the multiple possible definitions. Each predecessor will /// populate the block argument with the definition reached at its end. With /// this, the value stored can be well defined at block boundaries, allowing /// the propagation of replacement through blocks. /// /// This pass computes this transformation in four main steps. The two first /// steps are performed during an analysis phase that does not mutate IR. /// /// The two steps of the analysis phase are the following: /// - A first step computes the list of operations that transitively use the /// memory slot we would like to promote. The purpose of this phase is to /// identify which uses must be removed to promote the slot, either by rewiring /// the user or deleting it. Naturally, direct uses of the slot must be removed. /// Sometimes additional uses must also be removed: this is notably the case /// when a direct user of the slot cannot rewire its use and must delete itself, /// and thus must make its users no longer use it. If any of those uses cannot /// be removed by their users in any way, promotion cannot continue: this is /// decided at this step. /// - A second step computes the list of blocks where a block argument will be /// needed ("merge points") without mutating the IR. These blocks are the blocks /// leading to a definition clash between two predecessors. Such blocks happen /// to be the Iterated Dominance Frontier (IDF) of the set of blocks containing /// a store, as they represent the point where a clear defining dominator stops /// existing. Computing this information in advance allows making sure the /// terminators that will forward values are capable of doing so (inability to /// do so aborts promotion at this step). /// /// At this point, promotion is guaranteed to happen, and the mutation phase can /// begin with the following steps: /// - A third step computes the reaching definition of the memory slot at each /// blocking user. This is the core of the mem2reg algorithm, also known as /// load-store forwarding. This analyses loads and stores and propagates which /// value must be stored in the slot at each blocking user. This is achieved by /// doing a depth-first walk of the dominator tree of the function. This is /// sufficient because the reaching definition at the beginning of a block is /// either its new block argument if it is a merge block, or the definition /// reaching the end of its immediate dominator (parent in the dominator tree). /// We can therefore propagate this information down the dominator tree to /// proceed with renaming within blocks. /// - The final fourth step uses the reaching definition to remove blocking uses /// in topological order. /// /// For further reading, chapter three of SSA-based Compiler Design [1] /// showcases SSA construction, where mem2reg is an adaptation of the same /// process. /// /// [1]: Rastello F. & Bouchez Tichadou F., SSA-based Compiler Design (2022), /// Springer. namespace { BlockingUsesMap; /// Information computed during promotion analysis used to perform actual /// promotion. struct MemorySlotPromotionInfo { … }; /// Computes information for basic slot promotion. This will check that direct /// slot promotion can be performed, and provide the information to execute the /// promotion. This does not mutate IR. class MemorySlotPromotionAnalyzer { … }; BlockIndexCache; /// The MemorySlotPromoter handles the state of promoting a memory slot. It /// wraps a slot and its associated allocator. This will perform the mutation of /// IR. class MemorySlotPromoter { … }; } // namespace MemorySlotPromoter::MemorySlotPromoter( MemorySlot slot, PromotableAllocationOpInterface allocator, OpBuilder &builder, DominanceInfo &dominance, const DataLayout &dataLayout, MemorySlotPromotionInfo info, const Mem2RegStatistics &statistics, BlockIndexCache &blockIndexCache) : … { … } Value MemorySlotPromoter::getOrCreateDefaultValue() { … } LogicalResult MemorySlotPromotionAnalyzer::computeBlockingUses( BlockingUsesMap &userToBlockingUses) { … } SmallPtrSet<Block *, 16> MemorySlotPromotionAnalyzer::computeSlotLiveIn( SmallPtrSetImpl<Block *> &definingBlocks) { … } IDFCalculator; void MemorySlotPromotionAnalyzer::computeMergePoints( SmallPtrSetImpl<Block *> &mergePoints) { … } bool MemorySlotPromotionAnalyzer::areMergePointsUsable( SmallPtrSetImpl<Block *> &mergePoints) { … } std::optional<MemorySlotPromotionInfo> MemorySlotPromotionAnalyzer::computeInfo() { … } Value MemorySlotPromoter::computeReachingDefInBlock(Block *block, Value reachingDef) { … } void MemorySlotPromoter::computeReachingDefInRegion(Region *region, Value reachingDef) { … } /// Gets or creates a block index mapping for `region`. static const DenseMap<Block *, size_t> & getOrCreateBlockIndices(BlockIndexCache &blockIndexCache, Region *region) { … } /// Sorts `ops` according to dominance. Relies on the topological order of basic /// blocks to get a deterministic ordering. Uses `blockIndexCache` to avoid the /// potentially expensive recomputation of a block index map. static void dominanceSort(SmallVector<Operation *> &ops, Region ®ion, BlockIndexCache &blockIndexCache) { … } void MemorySlotPromoter::removeBlockingUses() { … } std::optional<PromotableAllocationOpInterface> MemorySlotPromoter::promoteSlot() { … } LogicalResult mlir::tryToPromoteMemorySlots( ArrayRef<PromotableAllocationOpInterface> allocators, OpBuilder &builder, const DataLayout &dataLayout, DominanceInfo &dominance, Mem2RegStatistics statistics) { … } namespace { struct Mem2Reg : impl::Mem2RegBase<Mem2Reg> { … }; } // namespace