MemCpyOptimizer.cpp | Explore in Territory

//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass performs various transformations related to eliminating memcpy
// calls, or transforming sets of stores into memset's.
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <optional>

usingnamespacellvm;

#define DEBUG_TYPE …

static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
    "enable-memcpyopt-without-libcalls", cl::Hidden,
    cl::desc("Enable memcpyopt even when libcalls are disabled"));

STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
STATISTIC(NumStackMove, "Number of stack-move optimizations performed");

namespace {

/// Represents a range of memset'd bytes with the ByteVal value.
/// This allows us to analyze stores like:
///   store 0 -> P+1
///   store 0 -> P+0
///   store 0 -> P+3
///   store 0 -> P+2
/// which sometimes happens with stores to arrays of structs etc.  When we see
/// the first store, we make a range [1, 2).  The second store extends the range
/// to [0, 2).  The third makes a new range [2, 3).  The fourth store joins the
/// two ranges into [0, 3) which is memset'able.
struct MemsetRange { … };

} // end anonymous namespace

bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const { … }

namespace {

class MemsetRanges { … };

} // end anonymous namespace

/// Add a new store to the MemsetRanges data structure.  This adds a
/// new range for the specified store at the specified offset, merging into
/// existing ranges as appropriate.
void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
                            MaybeAlign Alignment, Instruction *Inst) { … }

//===----------------------------------------------------------------------===//
//                         MemCpyOptLegacyPass Pass
//===----------------------------------------------------------------------===//

// Check that V is either not accessible by the caller, or unwinding cannot
// occur between Start and End.
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,
                                         Instruction *End) { … }

void MemCpyOptPass::eraseInstruction(Instruction *I) { … }

// Check for mod or ref of Loc between Start and End, excluding both boundaries.
// Start and End must be in the same block.
// If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start
// intrinsic and store it inside SkippedLifetimeStart.
static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc,
                            const MemoryUseOrDef *Start,
                            const MemoryUseOrDef *End,
                            Instruction **SkippedLifetimeStart = nullptr) { … }

// Check for mod of Loc between Start and End, excluding both boundaries.
// Start and End can be in different blocks.
static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA,
                           MemoryLocation Loc, const MemoryUseOrDef *Start,
                           const MemoryUseOrDef *End) { … }

// Update AA metadata
static void combineAAMetadata(Instruction *ReplInst, Instruction *I) { … }

/// When scanning forward over instructions, we look for some other patterns to
/// fold away. In particular, this looks for stores to neighboring locations of
/// memory. If it sees enough consecutive ones, it attempts to merge them
/// together into a memcpy/memset.
Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
                                                 Value *StartPtr,
                                                 Value *ByteVal) { … }

// This method try to lift a store instruction before position P.
// It will lift the store and its argument + that anything that
// may alias with these.
// The method returns true if it was successful.
bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { … }

bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
                                       const DataLayout &DL,
                                       BasicBlock::iterator &BBI) { … }

bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { … }

bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { … }

/// Takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
                                         Instruction *cpyStore, Value *cpyDest,
                                         Value *cpySrc, TypeSize cpySize,
                                         Align cpyDestAlign,
                                         BatchAAResults &BAA,
                                         std::function<CallInst *()> GetC) { … }

/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
                                                  MemCpyInst *MDep,
                                                  BatchAAResults &BAA) { … }

/// We've found that the (upward scanning) memory dependence of \p MemCpy is
/// \p MemSet.  Try to simplify \p MemSet to only set the trailing bytes that
/// weren't copied over by \p MemCpy.
///
/// In other words, transform:
/// \code
///   memset(dst, c, dst_size);
///   ...
///   memcpy(dst, src, src_size);
/// \endcode
/// into:
/// \code
///   ...
///   memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
///   memcpy(dst, src, src_size);
/// \endcode
///
/// The memset is sunk to just before the memcpy to ensure that src_size is
/// present when emitting the simplified memset.
bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
                                                  MemSetInst *MemSet,
                                                  BatchAAResults &BAA) { … }

/// Determine whether the instruction has undefined content for the given Size,
/// either because it was freshly alloca'd or started its lifetime.
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
                             MemoryDef *Def, Value *Size) { … }

/// Transform memcpy to memset when its source was just memset.
/// In other words, turn:
/// \code
///   memset(dst1, c, dst1_size);
///   memcpy(dst2, dst1, dst2_size);
/// \endcode
/// into:
/// \code
///   memset(dst1, c, dst1_size);
///   memset(dst2, c, dst2_size);
/// \endcode
/// When dst2_size <= dst1_size.
bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
                                               MemSetInst *MemSet,
                                               BatchAAResults &BAA) { … }

// Attempts to optimize the pattern whereby memory is copied from an alloca to
// another alloca, where the two allocas don't have conflicting mod/ref. If
// successful, the two allocas can be merged into one and the transfer can be
// deleted. This pattern is generated frequently in Rust, due to the ubiquity of
// move operations in that language.
//
// Once we determine that the optimization is safe to perform, we replace all
// uses of the destination alloca with the source alloca. We also "shrink wrap"
// the lifetime markers of the single merged alloca to before the first use
// and after the last use. Note that the "shrink wrapping" procedure is a safe
// transformation only because we restrict the scope of this optimization to
// allocas that aren't captured.
bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
                                          AllocaInst *DestAlloca,
                                          AllocaInst *SrcAlloca, TypeSize Size,
                                          BatchAAResults &BAA) { … }

static bool isZeroSize(Value *Size) { … }

/// Perform simplification of memcpy's.  If we have memcpy A
/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
/// B to be a memcpy from X to Z (or potentially a memmove, depending on
/// circumstances). This allows later passes to remove the first memcpy
/// altogether.
bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { … }

/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
/// not to alias.
bool MemCpyOptPass::processMemMove(MemMoveInst *M) { … }

/// This is called on every byval argument in call sites.
bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) { … }

/// This is called on memcpy dest pointer arguments attributed as immutable
/// during call. Try to use memcpy source directly if all of the following
/// conditions are satisfied.
/// 1. The memcpy dst is neither modified during the call nor captured by the
/// call.
/// 2. The memcpy dst is an alloca with known alignment & size.
///     2-1. The memcpy length == the alloca size which ensures that the new
///     pointer is dereferenceable for the required range
///     2-2. The src pointer has alignment >= the alloca alignment or can be
///     enforced so.
/// 3. The memcpy dst and src is not modified between the memcpy and the call.
/// (if MSSA clobber check is safe.)
/// 4. The memcpy src is not modified during the call. (ModRef check shows no
/// Mod.)
bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) { … }

/// Executes one iteration of MemCpyOptPass.
bool MemCpyOptPass::iterateOnFunction(Function &F) { … }

PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) { … }

bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
                            AliasAnalysis *AA_, AssumptionCache *AC_,
                            DominatorTree *DT_, PostDominatorTree *PDT_,
                            MemorySSA *MSSA_) { … }
llvm/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp