//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass performs various transformations related to eliminating memcpy // calls, or transforming sets of stores into memset's. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <cassert> #include <cstdint> #include <optional> usingnamespacellvm; #define DEBUG_TYPE … static cl::opt<bool> EnableMemCpyOptWithoutLibcalls( "enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled")); STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); STATISTIC(NumMemSetInfer, "Number of memsets inferred"); STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); STATISTIC(NumCallSlot, "Number of call slot optimizations performed"); STATISTIC(NumStackMove, "Number of stack-move optimizations performed"); namespace { /// Represents a range of memset'd bytes with the ByteVal value. /// This allows us to analyze stores like: /// store 0 -> P+1 /// store 0 -> P+0 /// store 0 -> P+3 /// store 0 -> P+2 /// which sometimes happens with stores to arrays of structs etc. When we see /// the first store, we make a range [1, 2). The second store extends the range /// to [0, 2). The third makes a new range [2, 3). The fourth store joins the /// two ranges into [0, 3) which is memset'able. struct MemsetRange { … }; } // end anonymous namespace bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const { … } namespace { class MemsetRanges { … }; } // end anonymous namespace /// Add a new store to the MemsetRanges data structure. This adds a /// new range for the specified store at the specified offset, merging into /// existing ranges as appropriate. void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment, Instruction *Inst) { … } //===----------------------------------------------------------------------===// // MemCpyOptLegacyPass Pass //===----------------------------------------------------------------------===// // Check that V is either not accessible by the caller, or unwinding cannot // occur between Start and End. static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End) { … } void MemCpyOptPass::eraseInstruction(Instruction *I) { … } // Check for mod or ref of Loc between Start and End, excluding both boundaries. // Start and End must be in the same block. // If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start // intrinsic and store it inside SkippedLifetimeStart. static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart = nullptr) { … } // Check for mod of Loc between Start and End, excluding both boundaries. // Start and End can be in different blocks. static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End) { … } // Update AA metadata static void combineAAMetadata(Instruction *ReplInst, Instruction *I) { … } /// When scanning forward over instructions, we look for some other patterns to /// fold away. In particular, this looks for stores to neighboring locations of /// memory. If it sees enough consecutive ones, it attempts to merge them /// together into a memcpy/memset. Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, Value *StartPtr, Value *ByteVal) { … } // This method try to lift a store instruction before position P. // It will lift the store and its argument + that anything that // may alias with these. // The method returns true if it was successful. bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { … } bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI, const DataLayout &DL, BasicBlock::iterator &BBI) { … } bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { … } bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { … } /// Takes a memcpy and a call that it depends on, /// and checks for the possibility of a call slot optimization by having /// the call write its result directly into the destination of the memcpy. bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore, Value *cpyDest, Value *cpySrc, TypeSize cpySize, Align cpyDestAlign, BatchAAResults &BAA, std::function<CallInst *()> GetC) { … } /// We've found that the (upward scanning) memory dependence of memcpy 'M' is /// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can. bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, BatchAAResults &BAA) { … } /// We've found that the (upward scanning) memory dependence of \p MemCpy is /// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that /// weren't copied over by \p MemCpy. /// /// In other words, transform: /// \code /// memset(dst, c, dst_size); /// ... /// memcpy(dst, src, src_size); /// \endcode /// into: /// \code /// ... /// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size); /// memcpy(dst, src, src_size); /// \endcode /// /// The memset is sunk to just before the memcpy to ensure that src_size is /// present when emitting the simplified memset. bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, MemSetInst *MemSet, BatchAAResults &BAA) { … } /// Determine whether the instruction has undefined content for the given Size, /// either because it was freshly alloca'd or started its lifetime. static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, MemoryDef *Def, Value *Size) { … } /// Transform memcpy to memset when its source was just memset. /// In other words, turn: /// \code /// memset(dst1, c, dst1_size); /// memcpy(dst2, dst1, dst2_size); /// \endcode /// into: /// \code /// memset(dst1, c, dst1_size); /// memset(dst2, c, dst2_size); /// \endcode /// When dst2_size <= dst1_size. bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet, BatchAAResults &BAA) { … } // Attempts to optimize the pattern whereby memory is copied from an alloca to // another alloca, where the two allocas don't have conflicting mod/ref. If // successful, the two allocas can be merged into one and the transfer can be // deleted. This pattern is generated frequently in Rust, due to the ubiquity of // move operations in that language. // // Once we determine that the optimization is safe to perform, we replace all // uses of the destination alloca with the source alloca. We also "shrink wrap" // the lifetime markers of the single merged alloca to before the first use // and after the last use. Note that the "shrink wrapping" procedure is a safe // transformation only because we restrict the scope of this optimization to // allocas that aren't captured. bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, AllocaInst *DestAlloca, AllocaInst *SrcAlloca, TypeSize Size, BatchAAResults &BAA) { … } static bool isZeroSize(Value *Size) { … } /// Perform simplification of memcpy's. If we have memcpy A /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite /// B to be a memcpy from X to Z (or potentially a memmove, depending on /// circumstances). This allows later passes to remove the first memcpy /// altogether. bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { … } /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed /// not to alias. bool MemCpyOptPass::processMemMove(MemMoveInst *M) { … } /// This is called on every byval argument in call sites. bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) { … } /// This is called on memcpy dest pointer arguments attributed as immutable /// during call. Try to use memcpy source directly if all of the following /// conditions are satisfied. /// 1. The memcpy dst is neither modified during the call nor captured by the /// call. /// 2. The memcpy dst is an alloca with known alignment & size. /// 2-1. The memcpy length == the alloca size which ensures that the new /// pointer is dereferenceable for the required range /// 2-2. The src pointer has alignment >= the alloca alignment or can be /// enforced so. /// 3. The memcpy dst and src is not modified between the memcpy and the call. /// (if MSSA clobber check is safe.) /// 4. The memcpy src is not modified during the call. (ModRef check shows no /// Mod.) bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) { … } /// Executes one iteration of MemCpyOptPass. bool MemCpyOptPass::iterateOnFunction(Function &F) { … } PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) { … } bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_, AliasAnalysis *AA_, AssumptionCache *AC_, DominatorTree *DT_, PostDominatorTree *PDT_, MemorySSA *MSSA_) { … }