//===- InstCombineSelect.cpp ----------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the visitSelect function. // //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/OverflowInstAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" #include <cassert> #include <utility> #define DEBUG_TYPE … #include "llvm/Transforms/Utils/InstructionWorklist.h" usingnamespacellvm; usingnamespacePatternMatch; /// Replace a select operand based on an equality comparison with the identity /// constant of a binop. static Instruction *foldSelectBinOpIdentity(SelectInst &Sel, const TargetLibraryInfo &TLI, InstCombinerImpl &IC) { … } /// This folds: /// select (icmp eq (and X, C1)), TC, FC /// iff C1 is a power 2 and the difference between TC and FC is a power-of-2. /// To something like: /// (shr (and (X, C1)), (log2(C1) - log2(TC-FC))) + FC /// Or: /// (shl (and (X, C1)), (log2(TC-FC) - log2(C1))) + FC /// With some variations depending if FC is larger than TC, or the shift /// isn't needed, or the bit widths don't match. static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp, InstCombiner::BuilderTy &Builder) { … } /// We want to turn code that looks like this: /// %C = or %A, %B /// %D = select %cond, %C, %A /// into: /// %C = select %cond, %B, 0 /// %D = or %A, %C /// /// Assuming that the specified instruction is an operand to the select, return /// a bitmask indicating which operands of this instruction are foldable if they /// equal the other incoming value of the select. static unsigned getSelectFoldableOperands(BinaryOperator *I) { … } /// We have (select c, TI, FI), and we know that TI and FI have the same opcode. Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI, Instruction *FI) { … } static bool isSelect01(const APInt &C1I, const APInt &C2I) { … } /// Try to fold the select into one of the operands to allow further /// optimization. Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, Value *FalseVal) { … } /// We want to turn: /// (select (icmp eq (and X, Y), 0), (and (lshr X, Z), 1), 1) /// into: /// zext (icmp ne i32 (and X, (or Y, (shl 1, Z))), 0) /// Note: /// Z may be 0 if lshr is missing. /// Worst-case scenario is that we will replace 5 instructions with 5 different /// instructions, but we got rid of select. static Instruction *foldSelectICmpAndAnd(Type *SelType, const ICmpInst *Cmp, Value *TVal, Value *FVal, InstCombiner::BuilderTy &Builder) { … } /// We want to turn: /// (select (icmp eq (and X, C1), 0), 0, (shl [nsw/nuw] X, C2)); /// iff C1 is a mask and the number of its leading zeros is equal to C2 /// into: /// shl X, C2 static Value *foldSelectICmpAndZeroShl(const ICmpInst *Cmp, Value *TVal, Value *FVal, InstCombiner::BuilderTy &Builder) { … } /// We want to turn: /// (select (icmp sgt x, C), lshr (X, Y), ashr (X, Y)); iff C s>= -1 /// (select (icmp slt x, C), ashr (X, Y), lshr (X, Y)); iff C s>= 0 /// into: /// ashr (X, Y) static Value *foldSelectICmpLshrAshr(const ICmpInst *IC, Value *TrueVal, Value *FalseVal, InstCombiner::BuilderTy &Builder) { … } /// We want to turn: /// (select (icmp eq (and X, C1), 0), Y, (BinOp Y, C2)) /// into: /// IF C2 u>= C1 /// (BinOp Y, (shl (and X, C1), C3)) /// ELSE /// (BinOp Y, (lshr (and X, C1), C3)) /// iff: /// 0 on the RHS is the identity value (i.e add, xor, shl, etc...) /// C1 and C2 are both powers of 2 /// where: /// IF C2 u>= C1 /// C3 = Log(C2) - Log(C1) /// ELSE /// C3 = Log(C1) - Log(C2) /// /// This transform handles cases where: /// 1. The icmp predicate is inverted /// 2. The select operands are reversed /// 3. The magnitude of C2 and C1 are flipped static Value *foldSelectICmpAndBinOp(const ICmpInst *IC, Value *TrueVal, Value *FalseVal, InstCombiner::BuilderTy &Builder) { … } /// Canonicalize a set or clear of a masked set of constant bits to /// select-of-constants form. static Instruction *foldSetClearBits(SelectInst &Sel, InstCombiner::BuilderTy &Builder) { … } // select (x == 0), 0, x * y --> freeze(y) * x // select (y == 0), 0, x * y --> freeze(x) * y // select (x == 0), undef, x * y --> freeze(y) * x // select (x == undef), 0, x * y --> freeze(y) * x // Usage of mul instead of 0 will make the result more poisonous, // so the operand that was not checked in the condition should be frozen. // The latter folding is applied only when a constant compared with x is // is a vector consisting of 0 and undefs. If a constant compared with x // is a scalar undefined value or undefined vector then an expression // should be already folded into a constant. static Instruction *foldSelectZeroOrMul(SelectInst &SI, InstCombinerImpl &IC) { … } /// Transform patterns such as (a > b) ? a - b : 0 into usub.sat(a, b). /// There are 8 commuted/swapped variants of this pattern. static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI, const Value *TrueVal, const Value *FalseVal, InstCombiner::BuilderTy &Builder) { … } static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, InstCombiner::BuilderTy &Builder) { … } /// Try to match patterns with select and subtract as absolute difference. static Value *foldAbsDiff(ICmpInst *Cmp, Value *TVal, Value *FVal, InstCombiner::BuilderTy &Builder) { … } /// Fold the following code sequence: /// \code /// int a = ctlz(x & -x); // x ? 31 - a : a; // // or // x ? 31 - a : 32; /// \code /// /// into: /// cttz(x) static Instruction *foldSelectCtlzToCttz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, InstCombiner::BuilderTy &Builder) { … } /// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single /// call to cttz/ctlz with flag 'is_zero_poison' cleared. /// /// For example, we can fold the following code sequence: /// \code /// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) /// %1 = icmp ne i32 %x, 0 /// %2 = select i1 %1, i32 %0, i32 32 /// \code /// /// into: /// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, InstCombinerImpl &IC) { … } static Value *canonicalizeSPF(ICmpInst &Cmp, Value *TrueVal, Value *FalseVal, InstCombinerImpl &IC) { … } bool InstCombinerImpl::replaceInInstruction(Value *V, Value *Old, Value *New, unsigned Depth) { … } /// If we have a select with an equality comparison, then we know the value in /// one of the arms of the select. See if substituting this value into an arm /// and simplifying the result yields the same value as the other arm. /// /// To make this transform safe, we must drop poison-generating flags /// (nsw, etc) if we simplified to a binop because the select may be guarding /// that poison from propagating. If the existing binop already had no /// poison-generating flags, then this transform can be done by instsimplify. /// /// Consider: /// %cmp = icmp eq i32 %x, 2147483647 /// %add = add nsw i32 %x, 1 /// %sel = select i1 %cmp, i32 -2147483648, i32 %add /// /// We can't replace %sel with %add unless we strip away the flags. /// TODO: Wrapping flags could be preserved in some cases with better analysis. Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp) { … } /// Fold the following code sequence: /// \code /// %XeqZ = icmp eq i64 %X, %Z /// %YeqZ = icmp eq i64 %Y, %Z /// %XeqY = icmp eq i64 %X, %Y /// %not.YeqZ = xor i1 %YeqZ, true /// %and = select i1 %not.YeqZ, i1 %XeqY, i1 false /// %equal = select i1 %XeqZ, i1 %YeqZ, i1 %and /// \code /// /// into: /// %equal = icmp eq i64 %X, %Y Instruction *InstCombinerImpl::foldSelectEqualityTest(SelectInst &Sel) { … } // See if this is a pattern like: // %old_cmp1 = icmp slt i32 %x, C2 // %old_replacement = select i1 %old_cmp1, i32 %target_low, i32 %target_high // %old_x_offseted = add i32 %x, C1 // %old_cmp0 = icmp ult i32 %old_x_offseted, C0 // %r = select i1 %old_cmp0, i32 %x, i32 %old_replacement // This can be rewritten as more canonical pattern: // %new_cmp1 = icmp slt i32 %x, -C1 // %new_cmp2 = icmp sge i32 %x, C0-C1 // %new_clamped_low = select i1 %new_cmp1, i32 %target_low, i32 %x // %r = select i1 %new_cmp2, i32 %target_high, i32 %new_clamped_low // Iff -C1 s<= C2 s<= C0-C1 // Also ULT predicate can also be UGT iff C0 != -1 (+invert result) // SLT predicate can also be SGT iff C2 != INT_MAX (+invert res.) static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, InstCombiner::BuilderTy &Builder, InstCombiner &IC) { … } // If we have // %cmp = icmp [canonical predicate] i32 %x, C0 // %r = select i1 %cmp, i32 %y, i32 C1 // Where C0 != C1 and %x may be different from %y, see if the constant that we // will have if we flip the strictness of the predicate (i.e. without changing // the result) is identical to the C1 in select. If it matches we can change // original comparison to one with swapped predicate, reuse the constant, // and swap the hands of select. static Instruction * tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp, InstCombinerImpl &IC) { … } static Instruction *foldSelectZeroOrOnes(ICmpInst *Cmp, Value *TVal, Value *FVal, InstCombiner::BuilderTy &Builder) { … } static Value *foldSelectInstWithICmpConst(SelectInst &SI, ICmpInst *ICI, InstCombiner::BuilderTy &Builder) { … } static Instruction *foldSelectICmpEq(SelectInst &SI, ICmpInst *ICI, InstCombinerImpl &IC) { … } /// Visit a SelectInst that has an ICmpInst as its first operand. Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI) { … } /// We have an SPF (e.g. a min or max) of an SPF of the form: /// SPF2(SPF1(A, B), C) Instruction *InstCombinerImpl::foldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1, Value *A, Value *B, Instruction &Outer, SelectPatternFlavor SPF2, Value *C) { … } /// Turn select C, (X + Y), (X - Y) --> (X + (select C, Y, (-Y))). /// This is even legal for FP. static Instruction *foldAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) { … } /// Turn X + Y overflows ? -1 : X + Y -> uadd_sat X, Y /// And X - Y overflows ? 0 : X - Y -> usub_sat X, Y /// Along with a number of patterns similar to: /// X + Y overflows ? (X < 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y /// X - Y overflows ? (X > 0 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y static Instruction * foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) { … } Instruction *InstCombinerImpl::foldSelectExtConst(SelectInst &Sel) { … } /// Try to transform a vector select with a constant condition vector into a /// shuffle for easier combining with other shuffles and insert/extract. static Instruction *canonicalizeSelectToShuffle(SelectInst &SI) { … } /// If we have a select of vectors with a scalar condition, try to convert that /// to a vector select by splatting the condition. A splat may get folded with /// other operations in IR and having all operands of a select be vector types /// is likely better for vector codegen. static Instruction *canonicalizeScalarSelectOfVecs(SelectInst &Sel, InstCombinerImpl &IC) { … } /// Reuse bitcasted operands between a compare and select: /// select (cmp (bitcast C), (bitcast D)), (bitcast' C), (bitcast' D) --> /// bitcast (select (cmp (bitcast C), (bitcast D)), (bitcast C), (bitcast D)) static Instruction *foldSelectCmpBitcasts(SelectInst &Sel, InstCombiner::BuilderTy &Builder) { … } /// Try to eliminate select instructions that test the returned flag of cmpxchg /// instructions. /// /// If a select instruction tests the returned flag of a cmpxchg instruction and /// selects between the returned value of the cmpxchg instruction its compare /// operand, the result of the select will always be equal to its false value. /// For example: /// /// %cmpxchg = cmpxchg ptr %ptr, i64 %compare, i64 %new_value seq_cst seq_cst /// %val = extractvalue { i64, i1 } %cmpxchg, 0 /// %success = extractvalue { i64, i1 } %cmpxchg, 1 /// %sel = select i1 %success, i64 %compare, i64 %val /// ret i64 %sel /// /// The returned value of the cmpxchg instruction (%val) is the original value /// located at %ptr prior to any update. If the cmpxchg operation succeeds, %val /// must have been equal to %compare. Thus, the result of the select is always /// equal to %val, and the code can be simplified to: /// /// %cmpxchg = cmpxchg ptr %ptr, i64 %compare, i64 %new_value seq_cst seq_cst /// %val = extractvalue { i64, i1 } %cmpxchg, 0 /// ret i64 %val /// static Value *foldSelectCmpXchg(SelectInst &SI) { … } /// Try to reduce a funnel/rotate pattern that includes a compare and select /// into a funnel shift intrinsic. Example: /// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b))) /// --> call llvm.fshl.i32(a, a, b) /// fshl32(a, b, c) --> (c == 0 ? a : ((b >> (32 - c)) | (a << c))) /// --> call llvm.fshl.i32(a, b, c) /// fshr32(a, b, c) --> (c == 0 ? b : ((a >> (32 - c)) | (b << c))) /// --> call llvm.fshr.i32(a, b, c) static Instruction *foldSelectFunnelShift(SelectInst &Sel, InstCombiner::BuilderTy &Builder) { … } static Instruction *foldSelectToCopysign(SelectInst &Sel, InstCombiner::BuilderTy &Builder) { … } Instruction *InstCombinerImpl::foldVectorSelect(SelectInst &Sel) { … } static Instruction *foldSelectToPhiImpl(SelectInst &Sel, BasicBlock *BB, const DominatorTree &DT, InstCombiner::BuilderTy &Builder) { … } static Instruction *foldSelectToPhi(SelectInst &Sel, const DominatorTree &DT, InstCombiner::BuilderTy &Builder) { … } /// Tries to reduce a pattern that arises when calculating the remainder of the /// Euclidean division. When the divisor is a power of two and is guaranteed not /// to be negative, a signed remainder can be folded with a bitwise and. /// /// (x % n) < 0 ? (x % n) + n : (x % n) /// -> x & (n - 1) static Instruction *foldSelectWithSRem(SelectInst &SI, InstCombinerImpl &IC, IRBuilderBase &Builder) { … } static Value *foldSelectWithFrozenICmp(SelectInst &Sel, InstCombiner::BuilderTy &Builder) { … } /// Given that \p CondVal is known to be \p CondIsTrue, try to simplify \p SI. static Value *simplifyNestedSelectsUsingImpliedCond(SelectInst &SI, Value *CondVal, bool CondIsTrue, const DataLayout &DL) { … } Instruction *InstCombinerImpl::foldAndOrOfSelectUsingImpliedCond(Value *Op, SelectInst &SI, bool IsAnd) { … } // Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need // fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work. static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI, InstCombinerImpl &IC) { … } // Match the following IR pattern: // %x.lowbits = and i8 %x, %lowbitmask // %x.lowbits.are.zero = icmp eq i8 %x.lowbits, 0 // %x.biased = add i8 %x, %bias // %x.biased.highbits = and i8 %x.biased, %highbitmask // %x.roundedup = select i1 %x.lowbits.are.zero, i8 %x, i8 %x.biased.highbits // Define: // %alignment = add i8 %lowbitmask, 1 // Iff 1. an %alignment is a power-of-two (aka, %lowbitmask is a low bit mask) // and 2. %bias is equal to either %lowbitmask or %alignment, // and 3. %highbitmask is equal to ~%lowbitmask (aka, to -%alignment) // then this pattern can be transformed into: // %x.offset = add i8 %x, %lowbitmask // %x.roundedup = and i8 %x.offset, %highbitmask static Value * foldRoundUpIntegerWithPow2Alignment(SelectInst &SI, InstCombiner::BuilderTy &Builder) { … } namespace { struct DecomposedSelect { … }; } // namespace /// Folds patterns like: /// select c2 (select c1 a b) (select c1 b a) /// into: /// select (xor c1 c2) b a static Instruction * foldSelectOfSymmetricSelect(SelectInst &OuterSelVal, InstCombiner::BuilderTy &Builder) { … } /// Look for patterns like /// %outer.cond = select i1 %inner.cond, i1 %alt.cond, i1 false /// %inner.sel = select i1 %inner.cond, i8 %inner.sel.t, i8 %inner.sel.f /// %outer.sel = select i1 %outer.cond, i8 %outer.sel.t, i8 %inner.sel /// and rewrite it as /// %inner.sel = select i1 %cond.alternative, i8 %sel.outer.t, i8 %sel.inner.t /// %sel.outer = select i1 %cond.inner, i8 %inner.sel, i8 %sel.inner.f static Instruction *foldNestedSelects(SelectInst &OuterSelVal, InstCombiner::BuilderTy &Builder) { … } Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { … } // Return true if we can safely remove the select instruction for std::bit_ceil // pattern. static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0, const APInt *Cond1, Value *CtlzOp, unsigned BitWidth, bool &ShouldDropNUW) { … } // Transform the std::bit_ceil(X) pattern like: // // %dec = add i32 %x, -1 // %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) // %sub = sub i32 32, %ctlz // %shl = shl i32 1, %sub // %ugt = icmp ugt i32 %x, 1 // %sel = select i1 %ugt, i32 %shl, i32 1 // // into: // // %dec = add i32 %x, -1 // %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) // %neg = sub i32 0, %ctlz // %masked = and i32 %ctlz, 31 // %shl = shl i32 1, %sub // // Note that the select is optimized away while the shift count is masked with // 31. We handle some variations of the input operand like std::bit_ceil(X + // 1). static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder) { … } // This function tries to fold the following operations: // (x < y) ? -1 : zext(x != y) // (x < y) ? -1 : zext(x > y) // (x > y) ? 1 : sext(x != y) // (x > y) ? 1 : sext(x < y) // Into ucmp/scmp(x, y), where signedness is determined by the signedness // of the comparison in the original sequence. Instruction *InstCombinerImpl::foldSelectToCmp(SelectInst &SI) { … } bool InstCombinerImpl::fmulByZeroIsZero(Value *MulVal, FastMathFlags FMF, const Instruction *CtxI) const { … } static bool matchFMulByZeroIfResultEqZero(InstCombinerImpl &IC, Value *Cmp0, Value *Cmp1, Value *TrueVal, Value *FalseVal, Instruction &CtxI, bool SelectIsNSZ) { … } /// Check whether the KnownBits of a select arm may be affected by the /// select condition. static bool hasAffectedValue(Value *V, SmallPtrSetImpl<Value *> &Affected, unsigned Depth) { … } Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { … }