llvm/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

//===- InstCombineCalls.cpp -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the visitCall, visitInvoke, and visitCallBr functions.
//
//===----------------------------------------------------------------------===//

#include "InstCombineInternal.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsHexagon.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <optional>
#include <utility>
#include <vector>

#define DEBUG_TYPE
#include "llvm/Transforms/Utils/InstructionWorklist.h"

usingnamespacellvm;
usingnamespacePatternMatch;

STATISTIC(NumSimplified, "Number of library calls simplified");

static cl::opt<unsigned> GuardWideningWindow(
    "instcombine-guard-widening-window",
    cl::init(3),
    cl::desc("How wide an instruction window to bypass looking for "
             "another guard"));

/// Return the specified type promoted as it would be to pass though a va_arg
/// area.
static Type *getPromotedType(Type *Ty) {}

/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
/// TODO: This should probably be integrated with visitAllocSites, but that
/// requires a deeper change to allow either unread or unwritten objects.
static bool hasUndefSource(AnyMemTransferInst *MI) {}

Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {}

Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {}

// TODO, Obvious Missing Transforms:
// * Narrow width by halfs excluding zero/undef lanes
Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {}

// TODO, Obvious Missing Transforms:
// * Single constant active lane -> store
// * Narrow width by halfs excluding zero/undef lanes
Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {}

// TODO, Obvious Missing Transforms:
// * Single constant active lane load -> load
// * Dereferenceable address & few lanes -> scalarize speculative load/selects
// * Adjacent vector addresses -> masked.load
// * Narrow width by halfs excluding zero/undef lanes
// * Vector incrementing address -> vector masked load
Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {}

// TODO, Obvious Missing Transforms:
// * Single constant active lane -> store
// * Adjacent vector addresses -> masked.store
// * Narrow store width by halfs excluding zero/undef lanes
// * Vector incrementing address -> vector masked store
Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {}

/// This function transforms launder.invariant.group and strip.invariant.group
/// like:
/// launder(launder(%x)) -> launder(%x)       (the result is not the argument)
/// launder(strip(%x)) -> launder(%x)
/// strip(strip(%x)) -> strip(%x)             (the result is not the argument)
/// strip(launder(%x)) -> strip(%x)
/// This is legal because it preserves the most recent information about
/// the presence or absence of invariant.group.
static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,
                                                    InstCombinerImpl &IC) {}

static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {}

static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {}

/// Convert a table lookup to shufflevector if the mask is constant.
/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
/// which case we could lower the shufflevector with rev64 instructions
/// as it's actually a byte reverse.
static Value *simplifyNeonTbl1(const IntrinsicInst &II,
                               InstCombiner::BuilderTy &Builder) {}

// Returns true iff the 2 intrinsics have the same operands, limiting the
// comparison to the first NumOperands.
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
                             unsigned NumOperands) {}

// Remove trivially empty start/end intrinsic ranges, i.e. a start
// immediately followed by an end (ignoring debuginfo or other
// start/end intrinsics in between). As this handles only the most trivial
// cases, tracking the nesting level is not needed:
//
//   call @llvm.foo.start(i1 0)
//   call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
//   call @llvm.foo.end(i1 0)
//   call @llvm.foo.end(i1 0) ; &I
static bool
removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC,
                          std::function<bool(const IntrinsicInst &)> IsStart) {}

Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) {}

static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) {}

/// Creates a result tuple for an overflow intrinsic \p II with a given
/// \p Result and a constant \p Overflow value.
static Instruction *createOverflowTuple(IntrinsicInst *II, Value *Result,
                                        Constant *Overflow) {}

Instruction *
InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {}

static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {}

static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {}

/// \returns the compare predicate type if the test performed by
/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
/// floating-point environment assumed for \p F for type \p Ty
static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask,
                                              const Function &F, Type *Ty) {}

Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {}

static std::optional<bool> getKnownSign(Value *Op, const SimplifyQuery &SQ) {}

static std::optional<bool> getKnownSignOrZero(Value *Op,
                                              const SimplifyQuery &SQ) {}

/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
static bool signBitMustBeTheSame(Value *Op0, Value *Op1,
                                 const SimplifyQuery &SQ) {}

/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
/// can trigger other combines.
static Instruction *moveAddAfterMinMax(IntrinsicInst *II,
                                       InstCombiner::BuilderTy &Builder) {}
/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {}


/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
/// can only be one of two possible constant values -- turn that into a select
/// of constants.
static Instruction *foldClampRangeOfTwo(IntrinsicInst *II,
                                        InstCombiner::BuilderTy &Builder) {}

/// If this min/max has a constant operand and an operand that is a matching
/// min/max with a constant operand, constant-fold the 2 constant operands.
static Value *reassociateMinMaxWithConstants(IntrinsicInst *II,
                                             IRBuilderBase &Builder,
                                             const SimplifyQuery &SQ) {}

/// If this min/max has a matching min/max operand with a constant, try to push
/// the constant operand into this instruction. This can enable more folds.
static Instruction *
reassociateMinMaxWithConstantInOperand(IntrinsicInst *II,
                                       InstCombiner::BuilderTy &Builder) {}

/// Reduce a sequence of min/max intrinsics with a common operand.
static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {}

/// If all arguments of the intrinsic are unary shuffles with the same mask,
/// try to shuffle after the intrinsic.
static Instruction *
foldShuffledIntrinsicOperands(IntrinsicInst *II,
                              InstCombiner::BuilderTy &Builder) {}

/// Fold the following cases and accepts bswap and bitreverse intrinsics:
///   bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
///   bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
template <Intrinsic::ID IntrID>
static Instruction *foldBitOrderCrossLogicOp(Value *V,
                                             InstCombiner::BuilderTy &Builder) {}

static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {}

/// Fold an unsigned minimum of trailing or leading zero bits counts:
///   umin(cttz(CtOp, ZeroUndef), ConstOp) --> cttz(CtOp | (1 << ConstOp))
///   umin(ctlz(CtOp, ZeroUndef), ConstOp) --> ctlz(CtOp | (SignedMin
///                                              >> ConstOp))
template <Intrinsic::ID IntrID>
static Value *
foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1,
                                          const DataLayout &DL,
                                          InstCombiner::BuilderTy &Builder) {}

/// CallInst simplification. This mostly only handles folding of intrinsic
/// instructions. For normal calls, it allows visitCallBase to do the heavy
/// lifting.
Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {}

// Fence instruction simplification
Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) {}

// InvokeInst simplification
Instruction *InstCombinerImpl::visitInvokeInst(InvokeInst &II) {}

// CallBrInst simplification
Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {}

Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {}

static IntrinsicInst *findInitTrampolineFromAlloca(Value *TrampMem) {}

static IntrinsicInst *findInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
                                               Value *TrampMem) {}

// Given a call to llvm.adjust.trampoline, find and return the corresponding
// call to llvm.init.trampoline if the call to the trampoline can be optimized
// to a direct call to a function.  Otherwise return NULL.
static IntrinsicInst *findInitTrampoline(Value *Callee) {}

bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
                                            const TargetLibraryInfo *TLI) {}

/// Improvements for call, callbr and invoke instructions.
Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {}

/// If the callee is a constexpr cast of a function, attempt to move the cast to
/// the arguments of the call/invoke.
/// CallBrInst is not supported.
bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {}

/// Turn a call to a function created by init_trampoline / adjust_trampoline
/// intrinsic pair into a direct call to the underlying function.
Instruction *
InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
                                                 IntrinsicInst &Tramp) {}