InstCombineSelect.cpp | Explore in Territory

//===- InstCombineSelect.cpp ----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the visitSelect function.
//
//===----------------------------------------------------------------------===//

#include "InstCombineInternal.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/OverflowInstAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include <cassert>
#include <utility>

#define DEBUG_TYPE …
#include "llvm/Transforms/Utils/InstructionWorklist.h"

usingnamespacellvm;
usingnamespacePatternMatch;


/// Replace a select operand based on an equality comparison with the identity
/// constant of a binop.
static Instruction *foldSelectBinOpIdentity(SelectInst &Sel,
                                            const TargetLibraryInfo &TLI,
                                            InstCombinerImpl &IC) { … }

/// This folds:
///  select (icmp eq (and X, C1)), TC, FC
///    iff C1 is a power 2 and the difference between TC and FC is a power-of-2.
/// To something like:
///  (shr (and (X, C1)), (log2(C1) - log2(TC-FC))) + FC
/// Or:
///  (shl (and (X, C1)), (log2(TC-FC) - log2(C1))) + FC
/// With some variations depending if FC is larger than TC, or the shift
/// isn't needed, or the bit widths don't match.
static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp,
                                InstCombiner::BuilderTy &Builder) { … }

/// We want to turn code that looks like this:
///   %C = or %A, %B
///   %D = select %cond, %C, %A
/// into:
///   %C = select %cond, %B, 0
///   %D = or %A, %C
///
/// Assuming that the specified instruction is an operand to the select, return
/// a bitmask indicating which operands of this instruction are foldable if they
/// equal the other incoming value of the select.
static unsigned getSelectFoldableOperands(BinaryOperator *I) { … }

/// We have (select c, TI, FI), and we know that TI and FI have the same opcode.
Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI,
                                              Instruction *FI) { … }

static bool isSelect01(const APInt &C1I, const APInt &C2I) { … }

/// Try to fold the select into one of the operands to allow further
/// optimization.
Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
                                                Value *FalseVal) { … }

/// We want to turn:
///   (select (icmp eq (and X, Y), 0), (and (lshr X, Z), 1), 1)
/// into:
///   zext (icmp ne i32 (and X, (or Y, (shl 1, Z))), 0)
/// Note:
///   Z may be 0 if lshr is missing.
/// Worst-case scenario is that we will replace 5 instructions with 5 different
/// instructions, but we got rid of select.
static Instruction *foldSelectICmpAndAnd(Type *SelType, const ICmpInst *Cmp,
                                         Value *TVal, Value *FVal,
                                         InstCombiner::BuilderTy &Builder) { … }

/// We want to turn:
///   (select (icmp eq (and X, C1), 0), 0, (shl [nsw/nuw] X, C2));
///   iff C1 is a mask and the number of its leading zeros is equal to C2
/// into:
///   shl X, C2
static Value *foldSelectICmpAndZeroShl(const ICmpInst *Cmp, Value *TVal,
                                       Value *FVal,
                                       InstCombiner::BuilderTy &Builder) { … }

/// We want to turn:
///   (select (icmp sgt x, C), lshr (X, Y), ashr (X, Y)); iff C s>= -1
///   (select (icmp slt x, C), ashr (X, Y), lshr (X, Y)); iff C s>= 0
/// into:
///   ashr (X, Y)
static Value *foldSelectICmpLshrAshr(const ICmpInst *IC, Value *TrueVal,
                                     Value *FalseVal,
                                     InstCombiner::BuilderTy &Builder) { … }

/// We want to turn:
///   (select (icmp eq (and X, C1), 0), Y, (BinOp Y, C2))
/// into:
///   IF C2 u>= C1
///     (BinOp Y, (shl (and X, C1), C3))
///   ELSE
///     (BinOp Y, (lshr (and X, C1), C3))
/// iff:
///   0 on the RHS is the identity value (i.e add, xor, shl, etc...)
///   C1 and C2 are both powers of 2
/// where:
///   IF C2 u>= C1
///     C3 = Log(C2) - Log(C1)
///   ELSE
///     C3 = Log(C1) - Log(C2)
///
/// This transform handles cases where:
/// 1. The icmp predicate is inverted
/// 2. The select operands are reversed
/// 3. The magnitude of C2 and C1 are flipped
static Value *foldSelectICmpAndBinOp(const ICmpInst *IC, Value *TrueVal,
                                  Value *FalseVal,
                                  InstCombiner::BuilderTy &Builder) { … }

/// Canonicalize a set or clear of a masked set of constant bits to
/// select-of-constants form.
static Instruction *foldSetClearBits(SelectInst &Sel,
                                     InstCombiner::BuilderTy &Builder) { … }

//   select (x == 0), 0, x * y --> freeze(y) * x
//   select (y == 0), 0, x * y --> freeze(x) * y
//   select (x == 0), undef, x * y --> freeze(y) * x
//   select (x == undef), 0, x * y --> freeze(y) * x
// Usage of mul instead of 0 will make the result more poisonous,
// so the operand that was not checked in the condition should be frozen.
// The latter folding is applied only when a constant compared with x is
// is a vector consisting of 0 and undefs. If a constant compared with x
// is a scalar undefined value or undefined vector then an expression
// should be already folded into a constant.
static Instruction *foldSelectZeroOrMul(SelectInst &SI, InstCombinerImpl &IC) { … }

/// Transform patterns such as (a > b) ? a - b : 0 into usub.sat(a, b).
/// There are 8 commuted/swapped variants of this pattern.
static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI,
                                            const Value *TrueVal,
                                            const Value *FalseVal,
                                            InstCombiner::BuilderTy &Builder) { … }

static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
                                       InstCombiner::BuilderTy &Builder) { … }

/// Try to match patterns with select and subtract as absolute difference.
static Value *foldAbsDiff(ICmpInst *Cmp, Value *TVal, Value *FVal,
                          InstCombiner::BuilderTy &Builder) { … }

/// Fold the following code sequence:
/// \code
///   int a = ctlz(x & -x);
//    x ? 31 - a : a;
//    // or
//    x ? 31 - a : 32;
/// \code
///
/// into:
///   cttz(x)
static Instruction *foldSelectCtlzToCttz(ICmpInst *ICI, Value *TrueVal,
                                         Value *FalseVal,
                                         InstCombiner::BuilderTy &Builder) { … }

/// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single
/// call to cttz/ctlz with flag 'is_zero_poison' cleared.
///
/// For example, we can fold the following code sequence:
/// \code
///   %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
///   %1 = icmp ne i32 %x, 0
///   %2 = select i1 %1, i32 %0, i32 32
/// \code
///
/// into:
///   %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
                                 InstCombinerImpl &IC) { … }

static Value *canonicalizeSPF(ICmpInst &Cmp, Value *TrueVal, Value *FalseVal,
                              InstCombinerImpl &IC) { … }

bool InstCombinerImpl::replaceInInstruction(Value *V, Value *Old, Value *New,
                                            unsigned Depth) { … }

/// If we have a select with an equality comparison, then we know the value in
/// one of the arms of the select. See if substituting this value into an arm
/// and simplifying the result yields the same value as the other arm.
///
/// To make this transform safe, we must drop poison-generating flags
/// (nsw, etc) if we simplified to a binop because the select may be guarding
/// that poison from propagating. If the existing binop already had no
/// poison-generating flags, then this transform can be done by instsimplify.
///
/// Consider:
///   %cmp = icmp eq i32 %x, 2147483647
///   %add = add nsw i32 %x, 1
///   %sel = select i1 %cmp, i32 -2147483648, i32 %add
///
/// We can't replace %sel with %add unless we strip away the flags.
/// TODO: Wrapping flags could be preserved in some cases with better analysis.
Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
                                                          ICmpInst &Cmp) { … }

// See if this is a pattern like:
//   %old_cmp1 = icmp slt i32 %x, C2
//   %old_replacement = select i1 %old_cmp1, i32 %target_low, i32 %target_high
//   %old_x_offseted = add i32 %x, C1
//   %old_cmp0 = icmp ult i32 %old_x_offseted, C0
//   %r = select i1 %old_cmp0, i32 %x, i32 %old_replacement
// This can be rewritten as more canonical pattern:
//   %new_cmp1 = icmp slt i32 %x, -C1
//   %new_cmp2 = icmp sge i32 %x, C0-C1
//   %new_clamped_low = select i1 %new_cmp1, i32 %target_low, i32 %x
//   %r = select i1 %new_cmp2, i32 %target_high, i32 %new_clamped_low
// Iff -C1 s<= C2 s<= C0-C1
// Also ULT predicate can also be UGT iff C0 != -1 (+invert result)
//      SLT predicate can also be SGT iff C2 != INT_MAX (+invert res.)
static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
                                    InstCombiner::BuilderTy &Builder,
                                    InstCombiner &IC) { … }

// If we have
//  %cmp = icmp [canonical predicate] i32 %x, C0
//  %r = select i1 %cmp, i32 %y, i32 C1
// Where C0 != C1 and %x may be different from %y, see if the constant that we
// will have if we flip the strictness of the predicate (i.e. without changing
// the result) is identical to the C1 in select. If it matches we can change
// original comparison to one with swapped predicate, reuse the constant,
// and swap the hands of select.
static Instruction *
tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp,
                                         InstCombinerImpl &IC) { … }

static Instruction *foldSelectZeroOrOnes(ICmpInst *Cmp, Value *TVal,
                                         Value *FVal,
                                         InstCombiner::BuilderTy &Builder) { … }

static Value *foldSelectInstWithICmpConst(SelectInst &SI, ICmpInst *ICI,
                                          InstCombiner::BuilderTy &Builder) { … }

static Instruction *foldSelectICmpEq(SelectInst &SI, ICmpInst *ICI,
                                     InstCombinerImpl &IC) { … }

/// Visit a SelectInst that has an ICmpInst as its first operand.
Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
                                                      ICmpInst *ICI) { … }

/// We have an SPF (e.g. a min or max) of an SPF of the form:
///   SPF2(SPF1(A, B), C)
Instruction *InstCombinerImpl::foldSPFofSPF(Instruction *Inner,
                                            SelectPatternFlavor SPF1, Value *A,
                                            Value *B, Instruction &Outer,
                                            SelectPatternFlavor SPF2,
                                            Value *C) { … }

/// Turn select C, (X + Y), (X - Y) --> (X + (select C, Y, (-Y))).
/// This is even legal for FP.
static Instruction *foldAddSubSelect(SelectInst &SI,
                                     InstCombiner::BuilderTy &Builder) { … }

/// Turn X + Y overflows ? -1 : X + Y -> uadd_sat X, Y
/// And X - Y overflows ? 0 : X - Y -> usub_sat X, Y
/// Along with a number of patterns similar to:
/// X + Y overflows ? (X < 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
/// X - Y overflows ? (X > 0 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
static Instruction *
foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) { … }

Instruction *InstCombinerImpl::foldSelectExtConst(SelectInst &Sel) { … }

/// Try to transform a vector select with a constant condition vector into a
/// shuffle for easier combining with other shuffles and insert/extract.
static Instruction *canonicalizeSelectToShuffle(SelectInst &SI) { … }

/// If we have a select of vectors with a scalar condition, try to convert that
/// to a vector select by splatting the condition. A splat may get folded with
/// other operations in IR and having all operands of a select be vector types
/// is likely better for vector codegen.
static Instruction *canonicalizeScalarSelectOfVecs(SelectInst &Sel,
                                                   InstCombinerImpl &IC) { … }

/// Reuse bitcasted operands between a compare and select:
/// select (cmp (bitcast C), (bitcast D)), (bitcast' C), (bitcast' D) -->
/// bitcast (select (cmp (bitcast C), (bitcast D)), (bitcast C), (bitcast D))
static Instruction *foldSelectCmpBitcasts(SelectInst &Sel,
                                          InstCombiner::BuilderTy &Builder) { … }

/// Try to eliminate select instructions that test the returned flag of cmpxchg
/// instructions.
///
/// If a select instruction tests the returned flag of a cmpxchg instruction and
/// selects between the returned value of the cmpxchg instruction its compare
/// operand, the result of the select will always be equal to its false value.
/// For example:
///
///   %cmpxchg = cmpxchg ptr %ptr, i64 %compare, i64 %new_value seq_cst seq_cst
///   %val = extractvalue { i64, i1 } %cmpxchg, 0
///   %success = extractvalue { i64, i1 } %cmpxchg, 1
///   %sel = select i1 %success, i64 %compare, i64 %val
///   ret i64 %sel
///
/// The returned value of the cmpxchg instruction (%val) is the original value
/// located at %ptr prior to any update. If the cmpxchg operation succeeds, %val
/// must have been equal to %compare. Thus, the result of the select is always
/// equal to %val, and the code can be simplified to:
///
///   %cmpxchg = cmpxchg ptr %ptr, i64 %compare, i64 %new_value seq_cst seq_cst
///   %val = extractvalue { i64, i1 } %cmpxchg, 0
///   ret i64 %val
///
static Value *foldSelectCmpXchg(SelectInst &SI) { … }

/// Try to reduce a funnel/rotate pattern that includes a compare and select
/// into a funnel shift intrinsic. Example:
/// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b)))
///              --> call llvm.fshl.i32(a, a, b)
/// fshl32(a, b, c) --> (c == 0 ? a : ((b >> (32 - c)) | (a << c)))
///                 --> call llvm.fshl.i32(a, b, c)
/// fshr32(a, b, c) --> (c == 0 ? b : ((a >> (32 - c)) | (b << c)))
///                 --> call llvm.fshr.i32(a, b, c)
static Instruction *foldSelectFunnelShift(SelectInst &Sel,
                                          InstCombiner::BuilderTy &Builder) { … }

static Instruction *foldSelectToCopysign(SelectInst &Sel,
                                         InstCombiner::BuilderTy &Builder) { … }

Instruction *InstCombinerImpl::foldVectorSelect(SelectInst &Sel) { … }

static Instruction *foldSelectToPhiImpl(SelectInst &Sel, BasicBlock *BB,
                                        const DominatorTree &DT,
                                        InstCombiner::BuilderTy &Builder) { … }

static Instruction *foldSelectToPhi(SelectInst &Sel, const DominatorTree &DT,
                                    InstCombiner::BuilderTy &Builder) { … }

/// Tries to reduce a pattern that arises when calculating the remainder of the
/// Euclidean division. When the divisor is a power of two and is guaranteed not
/// to be negative, a signed remainder can be folded with a bitwise and.
///
/// (x % n) < 0 ? (x % n) + n : (x % n)
///    -> x & (n - 1)
static Instruction *foldSelectWithSRem(SelectInst &SI, InstCombinerImpl &IC,
                                       IRBuilderBase &Builder) { … }

static Value *foldSelectWithFrozenICmp(SelectInst &Sel, InstCombiner::BuilderTy &Builder) { … }

/// Given that \p CondVal is known to be \p CondIsTrue, try to simplify \p SI.
static Value *simplifyNestedSelectsUsingImpliedCond(SelectInst &SI,
                                                    Value *CondVal,
                                                    bool CondIsTrue,
                                                    const DataLayout &DL) { … }

Instruction *InstCombinerImpl::foldAndOrOfSelectUsingImpliedCond(Value *Op,
                                                                 SelectInst &SI,
                                                                 bool IsAnd) { … }

// Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need
// fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work.
static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI,
                                             InstCombinerImpl &IC) { … }

// Match the following IR pattern:
//   %x.lowbits = and i8 %x, %lowbitmask
//   %x.lowbits.are.zero = icmp eq i8 %x.lowbits, 0
//   %x.biased = add i8 %x, %bias
//   %x.biased.highbits = and i8 %x.biased, %highbitmask
//   %x.roundedup = select i1 %x.lowbits.are.zero, i8 %x, i8 %x.biased.highbits
// Define:
//   %alignment = add i8 %lowbitmask, 1
// Iff 1. an %alignment is a power-of-two (aka, %lowbitmask is a low bit mask)
// and 2. %bias is equal to either %lowbitmask or %alignment,
// and 3. %highbitmask is equal to ~%lowbitmask (aka, to -%alignment)
// then this pattern can be transformed into:
//   %x.offset = add i8 %x, %lowbitmask
//   %x.roundedup = and i8 %x.offset, %highbitmask
static Value *
foldRoundUpIntegerWithPow2Alignment(SelectInst &SI,
                                    InstCombiner::BuilderTy &Builder) { … }

namespace {
struct DecomposedSelect { … };
} // namespace

/// Folds patterns like:
///   select c2 (select c1 a b) (select c1 b a)
/// into:
///   select (xor c1 c2) b a
static Instruction *
foldSelectOfSymmetricSelect(SelectInst &OuterSelVal,
                            InstCombiner::BuilderTy &Builder) { … }

/// Look for patterns like
///   %outer.cond = select i1 %inner.cond, i1 %alt.cond, i1 false
///   %inner.sel = select i1 %inner.cond, i8 %inner.sel.t, i8 %inner.sel.f
///   %outer.sel = select i1 %outer.cond, i8 %outer.sel.t, i8 %inner.sel
/// and rewrite it as
///   %inner.sel = select i1 %cond.alternative, i8 %sel.outer.t, i8 %sel.inner.t
///   %sel.outer = select i1 %cond.inner, i8 %inner.sel, i8 %sel.inner.f
static Instruction *foldNestedSelects(SelectInst &OuterSelVal,
                                      InstCombiner::BuilderTy &Builder) { … }

Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { … }

// Return true if we can safely remove the select instruction for std::bit_ceil
// pattern.
static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
                                        const APInt *Cond1, Value *CtlzOp,
                                        unsigned BitWidth,
                                        bool &ShouldDropNUW) { … }

// Transform the std::bit_ceil(X) pattern like:
//
//   %dec = add i32 %x, -1
//   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)
//   %sub = sub i32 32, %ctlz
//   %shl = shl i32 1, %sub
//   %ugt = icmp ugt i32 %x, 1
//   %sel = select i1 %ugt, i32 %shl, i32 1
//
// into:
//
//   %dec = add i32 %x, -1
//   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)
//   %neg = sub i32 0, %ctlz
//   %masked = and i32 %ctlz, 31
//   %shl = shl i32 1, %sub
//
// Note that the select is optimized away while the shift count is masked with
// 31.  We handle some variations of the input operand like std::bit_ceil(X +
// 1).
static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder) { … }

// This function tries to fold the following operations:
//   (x < y) ? -1 : zext(x != y)
//   (x < y) ? -1 : zext(x > y)
//   (x > y) ? 1 : sext(x != y)
//   (x > y) ? 1 : sext(x < y)
// Into ucmp/scmp(x, y), where signedness is determined by the signedness
// of the comparison in the original sequence.
Instruction *InstCombinerImpl::foldSelectToCmp(SelectInst &SI) { … }

bool InstCombinerImpl::fmulByZeroIsZero(Value *MulVal, FastMathFlags FMF,
                                        const Instruction *CtxI) const { … }

static bool matchFMulByZeroIfResultEqZero(InstCombinerImpl &IC, Value *Cmp0,
                                          Value *Cmp1, Value *TrueVal,
                                          Value *FalseVal, Instruction &CtxI,
                                          bool SelectIsNSZ) { … }

/// Check whether the KnownBits of a select arm may be affected by the
/// select condition.
static bool hasAffectedValue(Value *V, SmallPtrSetImpl<Value *> &Affected,
                             unsigned Depth) { … }

Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { … }
llvm/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp