llvm/llvm/lib/CodeGen/ExpandMemCmp.cpp

//===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass tries to expand memcmp() calls into optimally-sized loads and
// compares for the target.
//
//===----------------------------------------------------------------------===//

#include "llvm/CodeGen/ExpandMemCmp.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include <optional>

usingnamespacellvm;
usingnamespacellvm::PatternMatch;

namespace llvm {
class TargetLowering;
}

#define DEBUG_TYPE

STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
STATISTIC(NumMemCmpGreaterThanMax,
          "Number of memcmp calls with size greater than max size");
STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");

static cl::opt<unsigned> MemCmpEqZeroNumLoadsPerBlock(
    "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
    cl::desc("The number of loads per basic block for inline expansion of "
             "memcmp that is only being compared against zero."));

static cl::opt<unsigned> MaxLoadsPerMemcmp(
    "max-loads-per-memcmp", cl::Hidden,
    cl::desc("Set maximum number of loads used in expanded memcmp"));

static cl::opt<unsigned> MaxLoadsPerMemcmpOptSize(
    "max-loads-per-memcmp-opt-size", cl::Hidden,
    cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"));

namespace {


// This class provides helper functions to expand a memcmp library call into an
// inline expansion.
class MemCmpExpansion {};

MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence(
    uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
    const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {}

MemCmpExpansion::LoadEntryVector
MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
                                                const unsigned MaxLoadSize,
                                                const unsigned MaxNumLoads,
                                                unsigned &NumLoadsNonOneByte) {}

void MemCmpExpansion::optimiseLoadSequence(
    LoadEntryVector &LoadSequence,
    const TargetTransformInfo::MemCmpExpansionOptions &Options,
    bool IsUsedForZeroCmp) {}

// Initialize the basic block structure required for expansion of memcmp call
// with given maximum load size and memcmp size parameter.
// This structure includes:
// 1. A list of load compare blocks - LoadCmpBlocks.
// 2. An EndBlock, split from original instruction point, which is the block to
// return from.
// 3. ResultBlock, block to branch to for early exit when a
// LoadCmpBlock finds a difference.
MemCmpExpansion::MemCmpExpansion(
    CallInst *const CI, uint64_t Size,
    const TargetTransformInfo::MemCmpExpansionOptions &Options,
    const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
    DomTreeUpdater *DTU)
    :{}

unsigned MemCmpExpansion::getNumBlocks() {}

void MemCmpExpansion::createLoadCmpBlocks() {}

void MemCmpExpansion::createResultBlock() {}

MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
                                                       Type *BSwapSizeType,
                                                       Type *CmpSizeType,
                                                       unsigned OffsetBytes) {}

// This function creates the IR instructions for loading and comparing 1 byte.
// It loads 1 byte from each source of the memcmp parameters with the given
// GEPIndex. It then subtracts the two loaded values and adds this result to the
// final phi node for selecting the memcmp result.
void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
                                               unsigned OffsetBytes) {}

/// Generate an equality comparison for one or more pairs of loaded values.
/// This is used in the case where the memcmp() call is compared equal or not
/// equal to zero.
Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
                                            unsigned &LoadIndex) {}

void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
                                                        unsigned &LoadIndex) {}

// This function creates the IR intructions for loading and comparing using the
// given LoadSize. It loads the number of bytes specified by LoadSize from each
// source of the memcmp parameters. It then does a subtract to see if there was
// a difference in the loaded values. If a difference is found, it branches
// with an early exit to the ResultBlock for calculating which source was
// larger. Otherwise, it falls through to the either the next LoadCmpBlock or
// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
// a special case through emitLoadCompareByteBlock. The special handling can
// simply subtract the loaded values and add it to the result phi node.
void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {}

// This function populates the ResultBlock with a sequence to calculate the
// memcmp result. It compares the two loaded source values and returns -1 if
// src1 < src2 and 1 if src1 > src2.
void MemCmpExpansion::emitMemCmpResultBlock() {}

void MemCmpExpansion::setupResultBlockPHINodes() {}

void MemCmpExpansion::setupEndBlockPHINodes() {}

Value *MemCmpExpansion::getMemCmpExpansionZeroCase() {}

/// A memcmp expansion that compares equality with 0 and only has one block of
/// load and compare can bypass the compare, branch, and phi IR that is required
/// in the general case.
Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {}

/// A memcmp expansion that only has one block of load and compare can bypass
/// the compare, branch, and phi IR that is required in the general case.
/// This function also analyses users of memcmp, and if there is only one user
/// from which we can conclude that only 2 out of 3 memcmp outcomes really
/// matter, then it generates more efficient code with only one comparison.
Value *MemCmpExpansion::getMemCmpOneBlock() {}

// This function expands the memcmp call into an inline expansion and returns
// the memcmp result. Returns nullptr if the memcmp is already replaced.
Value *MemCmpExpansion::getMemCmpExpansion() {}

// This function checks to see if an expansion of memcmp can be generated.
// It checks for constant compare size that is less than the max inline size.
// If an expansion cannot occur, returns false to leave as a library call.
// Otherwise, the library call is replaced with a new IR instruction sequence.
/// We want to transform:
/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
/// To:
/// loadbb:
///  %0 = bitcast i32* %buffer2 to i8*
///  %1 = bitcast i32* %buffer1 to i8*
///  %2 = bitcast i8* %1 to i64*
///  %3 = bitcast i8* %0 to i64*
///  %4 = load i64, i64* %2
///  %5 = load i64, i64* %3
///  %6 = call i64 @llvm.bswap.i64(i64 %4)
///  %7 = call i64 @llvm.bswap.i64(i64 %5)
///  %8 = sub i64 %6, %7
///  %9 = icmp ne i64 %8, 0
///  br i1 %9, label %res_block, label %loadbb1
/// res_block:                                        ; preds = %loadbb2,
/// %loadbb1, %loadbb
///  %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ]
///  %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ]
///  %10 = icmp ult i64 %phi.src1, %phi.src2
///  %11 = select i1 %10, i32 -1, i32 1
///  br label %endblock
/// loadbb1:                                          ; preds = %loadbb
///  %12 = bitcast i32* %buffer2 to i8*
///  %13 = bitcast i32* %buffer1 to i8*
///  %14 = bitcast i8* %13 to i32*
///  %15 = bitcast i8* %12 to i32*
///  %16 = getelementptr i32, i32* %14, i32 2
///  %17 = getelementptr i32, i32* %15, i32 2
///  %18 = load i32, i32* %16
///  %19 = load i32, i32* %17
///  %20 = call i32 @llvm.bswap.i32(i32 %18)
///  %21 = call i32 @llvm.bswap.i32(i32 %19)
///  %22 = zext i32 %20 to i64
///  %23 = zext i32 %21 to i64
///  %24 = sub i64 %22, %23
///  %25 = icmp ne i64 %24, 0
///  br i1 %25, label %res_block, label %loadbb2
/// loadbb2:                                          ; preds = %loadbb1
///  %26 = bitcast i32* %buffer2 to i8*
///  %27 = bitcast i32* %buffer1 to i8*
///  %28 = bitcast i8* %27 to i16*
///  %29 = bitcast i8* %26 to i16*
///  %30 = getelementptr i16, i16* %28, i16 6
///  %31 = getelementptr i16, i16* %29, i16 6
///  %32 = load i16, i16* %30
///  %33 = load i16, i16* %31
///  %34 = call i16 @llvm.bswap.i16(i16 %32)
///  %35 = call i16 @llvm.bswap.i16(i16 %33)
///  %36 = zext i16 %34 to i64
///  %37 = zext i16 %35 to i64
///  %38 = sub i64 %36, %37
///  %39 = icmp ne i64 %38, 0
///  br i1 %39, label %res_block, label %loadbb3
/// loadbb3:                                          ; preds = %loadbb2
///  %40 = bitcast i32* %buffer2 to i8*
///  %41 = bitcast i32* %buffer1 to i8*
///  %42 = getelementptr i8, i8* %41, i8 14
///  %43 = getelementptr i8, i8* %40, i8 14
///  %44 = load i8, i8* %42
///  %45 = load i8, i8* %43
///  %46 = zext i8 %44 to i32
///  %47 = zext i8 %45 to i32
///  %48 = sub i32 %46, %47
///  br label %endblock
/// endblock:                                         ; preds = %res_block,
/// %loadbb3
///  %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
///  ret i32 %phi.res
static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
                         const TargetLowering *TLI, const DataLayout *DL,
                         ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
                         DomTreeUpdater *DTU, const bool IsBCmp) {}

// Returns true if a change was made.
static bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
                       const TargetTransformInfo *TTI, const TargetLowering *TL,
                       const DataLayout &DL, ProfileSummaryInfo *PSI,
                       BlockFrequencyInfo *BFI, DomTreeUpdater *DTU);

static PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
                                 const TargetTransformInfo *TTI,
                                 const TargetLowering *TL,
                                 ProfileSummaryInfo *PSI,
                                 BlockFrequencyInfo *BFI, DominatorTree *DT);

class ExpandMemCmpLegacyPass : public FunctionPass {};

bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
                const TargetTransformInfo *TTI, const TargetLowering *TL,
                const DataLayout &DL, ProfileSummaryInfo *PSI,
                BlockFrequencyInfo *BFI, DomTreeUpdater *DTU) {}

PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
                          const TargetTransformInfo *TTI,
                          const TargetLowering *TL, ProfileSummaryInfo *PSI,
                          BlockFrequencyInfo *BFI, DominatorTree *DT) {}

} // namespace

PreservedAnalyses ExpandMemCmpPass::run(Function &F,
                                        FunctionAnalysisManager &FAM) {}

char ExpandMemCmpLegacyPass::ID =;
INITIALIZE_PASS_BEGIN(ExpandMemCmpLegacyPass, DEBUG_TYPE,
                      "Expand memcmp() to load/stores", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(ExpandMemCmpLegacyPass, DEBUG_TYPE,
                    "Expand memcmp() to load/stores", false, false)

FunctionPass *llvm::createExpandMemCmpLegacyPass() {}