AggressiveInstCombine.cpp | Explore in Territory

//===- AggressiveInstCombine.cpp ------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the aggressive expression pattern combiner classes.
// Currently, it handles expression patterns for:
//  * Truncate instruction
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
#include "AggressiveInstCombineInternal.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"

usingnamespacellvm;
usingnamespacePatternMatch;

#define DEBUG_TYPE …

STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded");
STATISTIC(NumGuardedRotates,
          "Number of guarded rotates transformed into funnel shifts");
STATISTIC(NumGuardedFunnelShifts,
          "Number of guarded funnel shifts transformed into funnel shifts");
STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");

static cl::opt<unsigned> MaxInstrsToScan(
    "aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden,
    cl::desc("Max number of instructions to scan for aggressive instcombine."));

static cl::opt<unsigned> StrNCmpInlineThreshold(
    "strncmp-inline-threshold", cl::init(3), cl::Hidden,
    cl::desc("The maximum length of a constant string for a builtin string cmp "
             "call eligible for inlining. The default value is 3."));

static cl::opt<unsigned>
    MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden,
                          cl::desc("The maximum length of a constant string to "
                                   "inline a memchr call."));

/// Match a pattern for a bitwise funnel/rotate operation that partially guards
/// against undefined behavior by branching around the funnel-shift/rotation
/// when the shift amount is 0.
static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) { … }

/// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and
/// the bit indexes (Mask) needed by a masked compare. If we're matching a chain
/// of 'and' ops, then we also need to capture the fact that we saw an
/// "and X, 1", so that's an extra return value for that case.
struct MaskOps { … };

/// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a
/// chain of 'and' or 'or' instructions looking for shift ops of a common source
/// value. Examples:
///   or (or (or X, (X >> 3)), (X >> 5)), (X >> 8)
/// returns { X, 0x129 }
///   and (and (X >> 1), 1), (X >> 4)
/// returns { X, 0x12 }
static bool matchAndOrChain(Value *V, MaskOps &MOps) { … }

/// Match patterns that correspond to "any-bits-set" and "all-bits-set".
/// These will include a chain of 'or' or 'and'-shifted bits from a
/// common source value:
/// and (or  (lshr X, C), ...), 1 --> (X & CMask) != 0
/// and (and (lshr X, C), ...), 1 --> (X & CMask) == CMask
/// Note: "any-bits-clear" and "all-bits-clear" are variations of these patterns
/// that differ only with a final 'not' of the result. We expect that final
/// 'not' to be folded with the compare that we create here (invert predicate).
static bool foldAnyOrAllBitsSet(Instruction &I) { … }

// Try to recognize below function as popcount intrinsic.
// This is the "best" algorithm from
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
// Also used in TargetLowering::expandCTPOP().
//
// int popcount(unsigned int i) {
//   i = i - ((i >> 1) & 0x55555555);
//   i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
//   i = ((i + (i >> 4)) & 0x0F0F0F0F);
//   return (i * 0x01010101) >> 24;
// }
static bool tryToRecognizePopCount(Instruction &I) { … }

/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and
/// C2 saturate the value of the fp conversion. The transform is not reversable
/// as the fptosi.sat is more defined than the input - all values produce a
/// valid value for the fptosi.sat, where as some produce poison for original
/// that were out of range of the integer conversion. The reversed pattern may
/// use fmax and fmin instead. As we cannot directly reverse the transform, and
/// it is not always profitable, we make it conditional on the cost being
/// reported as lower by TTI.
static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) { … }

/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
/// pessimistic codegen that has to account for setting errno and can enable
/// vectorization.
static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI,
                     TargetLibraryInfo &TLI, AssumptionCache &AC,
                     DominatorTree &DT) { … }

// Check if this array of constants represents a cttz table.
// Iterate over the elements from \p Table by trying to find/match all
// the numbers from 0 to \p InputBits that should represent cttz results.
static bool isCTTZTable(const ConstantDataArray &Table, uint64_t Mul,
                        uint64_t Shift, uint64_t InputBits) { … }

// Try to recognize table-based ctz implementation.
// E.g., an example in C (for more cases please see the llvm/tests):
// int f(unsigned x) {
//    static const char table[32] =
//      {0, 1, 28, 2, 29, 14, 24, 3, 30,
//       22, 20, 15, 25, 17, 4, 8, 31, 27,
//       13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
//    return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
// }
// this can be lowered to `cttz` instruction.
// There is also a special case when the element is 0.
//
// Here are some examples or LLVM IR for a 64-bit target:
//
// CASE 1:
// %sub = sub i32 0, %x
// %and = and i32 %sub, %x
// %mul = mul i32 %and, 125613361
// %shr = lshr i32 %mul, 27
// %idxprom = zext i32 %shr to i64
// %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @ctz1.table, i64 0,
//     i64 %idxprom
// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
//
// CASE 2:
// %sub = sub i32 0, %x
// %and = and i32 %sub, %x
// %mul = mul i32 %and, 72416175
// %shr = lshr i32 %mul, 26
// %idxprom = zext i32 %shr to i64
// %arrayidx = getelementptr inbounds [64 x i16], [64 x i16]* @ctz2.table,
//     i64 0, i64 %idxprom
// %0 = load i16, i16* %arrayidx, align 2, !tbaa !8
//
// CASE 3:
// %sub = sub i32 0, %x
// %and = and i32 %sub, %x
// %mul = mul i32 %and, 81224991
// %shr = lshr i32 %mul, 27
// %idxprom = zext i32 %shr to i64
// %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @ctz3.table,
//     i64 0, i64 %idxprom
// %0 = load i32, i32* %arrayidx, align 4, !tbaa !8
//
// CASE 4:
// %sub = sub i64 0, %x
// %and = and i64 %sub, %x
// %mul = mul i64 %and, 283881067100198605
// %shr = lshr i64 %mul, 58
// %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* @table, i64 0,
//     i64 %shr
// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
//
// All this can be lowered to @llvm.cttz.i32/64 intrinsic.
static bool tryToRecognizeTableBasedCttz(Instruction &I) { … }

/// This is used by foldLoadsRecursive() to capture a Root Load node which is
/// of type or(load, load) and recursively build the wide load. Also capture the
/// shift amount, zero extend type and loadSize.
struct LoadOps { … };

// Identify and Merge consecutive loads recursively which is of the form
// (ZExt(L1) << shift1) | (ZExt(L2) << shift2) -> ZExt(L3) << shift1
// (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3)
static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
                               AliasAnalysis &AA) { … }

// For a given BB instruction, evaluate all loads in the chain that form a
// pattern which suggests that the loads can be combined. The one and only use
// of the loads is to form a wider load.
static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
                                 TargetTransformInfo &TTI, AliasAnalysis &AA,
                                 const DominatorTree &DT) { … }

// Calculate GEP Stride and accumulated const ModOffset. Return Stride and
// ModOffset
static std::pair<APInt, APInt>
getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL) { … }

/// If C is a constant patterned array and all valid loaded results for given
/// alignment are same to a constant, return that constant.
static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) { … }

namespace {
class StrNCmpInliner { … };

} // namespace

/// First we normalize calls to strncmp/strcmp to the form of
/// compare(s1, s2, N), which means comparing first N bytes of s1 and s2
/// (without considering '\0').
///
/// Examples:
///
/// \code
///   strncmp(s, "a", 3) -> compare(s, "a", 2)
///   strncmp(s, "abc", 3) -> compare(s, "abc", 3)
///   strncmp(s, "a\0b", 3) -> compare(s, "a\0b", 2)
///   strcmp(s, "a") -> compare(s, "a", 2)
///
///   char s2[] = {'a'}
///   strncmp(s, s2, 3) -> compare(s, s2, 3)
///
///   char s2[] = {'a', 'b', 'c', 'd'}
///   strncmp(s, s2, 3) -> compare(s, s2, 3)
/// \endcode
///
/// We only handle cases where N and exactly one of s1 and s2 are constant.
/// Cases that s1 and s2 are both constant are already handled by the
/// instcombine pass.
///
/// We do not handle cases where N > StrNCmpInlineThreshold.
///
/// We also do not handles cases where N < 2, which are already
/// handled by the instcombine pass.
///
bool StrNCmpInliner::optimizeStrNCmp() { … }

/// Convert
///
/// \code
///   ret = compare(s1, s2, N)
/// \endcode
///
/// into
///
/// \code
///   ret = (int)s1[0] - (int)s2[0]
///   if (ret != 0)
///     goto NE
///   ...
///   ret = (int)s1[N-2] - (int)s2[N-2]
///   if (ret != 0)
///     goto NE
///   ret = (int)s1[N-1] - (int)s2[N-1]
///   NE:
/// \endcode
///
/// CFG before and after the transformation:
///
/// (before)
/// BBCI
///
/// (after)
/// BBCI -> BBSubs[0] (sub,icmp) --NE-> BBNE -> BBTail
///                 |                    ^
///                 E                    |
///                 |                    |
///        BBSubs[1] (sub,icmp) --NE-----+
///                ...                   |
///        BBSubs[N-1]    (sub) ---------+
///
void StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
                                   bool Swapped) { … }

/// Convert memchr with a small constant string into a switch
static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU,
                       const DataLayout &DL) { … }

static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI,
                         TargetLibraryInfo &TLI, AssumptionCache &AC,
                         DominatorTree &DT, const DataLayout &DL,
                         bool &MadeCFGChange) { … }

/// This is the entry point for folds that could be implemented in regular
/// InstCombine, but they are separated because they are not expected to
/// occur frequently and/or have more than a constant-length pattern match.
static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
                                TargetTransformInfo &TTI,
                                TargetLibraryInfo &TLI, AliasAnalysis &AA,
                                AssumptionCache &AC, bool &MadeCFGChange) { … }

/// This is the entry point for all transforms. Pass manager differences are
/// handled in the callers of this function.
static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
                    TargetLibraryInfo &TLI, DominatorTree &DT,
                    AliasAnalysis &AA, bool &MadeCFGChange) { … }

PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
                                                 FunctionAnalysisManager &AM) { … }
llvm/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp