llvm/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

//===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass implements an idiom recognizer that transforms simple loops into a
// non-loop form.  In cases that this kicks in, it can be a significant
// performance win.
//
// If compiling for code size we avoid idiom recognition if the resulting
// code could be larger than the code for the original loop. One way this could
// happen is if the loop is not removable after idiom recognition due to the
// presence of non-idiom instructions. The initial implementation of the
// heuristics applies to idioms in multi-block loops.
//
//===----------------------------------------------------------------------===//
//
// TODO List:
//
// Future loop memory idioms to recognize:
//   memcmp, strlen, etc.
//
// This could recognize common matrix multiplies and dot product idioms and
// replace them with calls to BLAS (if linked in??).
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <utility>
#include <vector>

usingnamespacellvm;

#define DEBUG_TYPE

STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");
STATISTIC(
    NumShiftUntilBitTest,
    "Number of uncountable loops recognized as 'shift until bitttest' idiom");
STATISTIC(NumShiftUntilZero,
          "Number of uncountable loops recognized as 'shift until zero' idiom");

bool DisableLIRP::All;
static cl::opt<bool, true>
    DisableLIRPAll("disable-" DEBUG_TYPE "-all",
                   cl::desc("Options to disable Loop Idiom Recognize Pass."),
                   cl::location(DisableLIRP::All), cl::init(false),
                   cl::ReallyHidden);

bool DisableLIRP::Memset;
static cl::opt<bool, true>
    DisableLIRPMemset("disable-" DEBUG_TYPE "-memset",
                      cl::desc("Proceed with loop idiom recognize pass, but do "
                               "not convert loop(s) to memset."),
                      cl::location(DisableLIRP::Memset), cl::init(false),
                      cl::ReallyHidden);

bool DisableLIRP::Memcpy;
static cl::opt<bool, true>
    DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy",
                      cl::desc("Proceed with loop idiom recognize pass, but do "
                               "not convert loop(s) to memcpy."),
                      cl::location(DisableLIRP::Memcpy), cl::init(false),
                      cl::ReallyHidden);

static cl::opt<bool> UseLIRCodeSizeHeurs(
    "use-lir-code-size-heurs",
    cl::desc("Use loop idiom recognition code size heuristics when compiling"
             "with -Os/-Oz"),
    cl::init(true), cl::Hidden);

namespace {

class LoopIdiomRecognize {};
} // end anonymous namespace

PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
                                              LoopStandardAnalysisResults &AR,
                                              LPMUpdater &) {}

static void deleteDeadInstruction(Instruction *I) {}

//===----------------------------------------------------------------------===//
//
//          Implementation of LoopIdiomRecognize
//
//===----------------------------------------------------------------------===//

bool LoopIdiomRecognize::runOnLoop(Loop *L) {}

bool LoopIdiomRecognize::runOnCountableLoop() {}

static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {}

/// getMemSetPatternValue - If a strided store of the specified value is safe to
/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
/// be passed in.  Otherwise, return null.
///
/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
/// just replicate their input array and then pass on to memset_pattern16.
static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {}

LoopIdiomRecognize::LegalStoreKind
LoopIdiomRecognize::isLegalStore(StoreInst *SI) {}

void LoopIdiomRecognize::collectStores(BasicBlock *BB) {}

/// runOnLoopBlock - Process the specified block, which lives in a counted loop
/// with the specified backedge count.  This block is known to be in the current
/// loop and not in any subloops.
bool LoopIdiomRecognize::runOnLoopBlock(
    BasicBlock *BB, const SCEV *BECount,
    SmallVectorImpl<BasicBlock *> &ExitBlocks) {}

/// See if this store(s) can be promoted to a memset.
bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
                                           const SCEV *BECount, ForMemset For) {}

/// processLoopMemIntrinsic - Template function for calling different processor
/// functions based on mem intrinsic type.
template <typename MemInst>
bool LoopIdiomRecognize::processLoopMemIntrinsic(
    BasicBlock *BB,
    bool (LoopIdiomRecognize::*Processor)(MemInst *, const SCEV *),
    const SCEV *BECount) {}

/// processLoopMemCpy - See if this memcpy can be promoted to a large memcpy
bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
                                           const SCEV *BECount) {}

/// processLoopMemSet - See if this memset can be promoted to a large memset.
bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
                                           const SCEV *BECount) {}

/// mayLoopAccessLocation - Return true if the specified loop might access the
/// specified pointer location, which is a loop-strided access.  The 'Access'
/// argument specifies what the verboten forms of access are (read or write).
static bool
mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
                      const SCEV *BECount, const SCEV *StoreSizeSCEV,
                      AliasAnalysis &AA,
                      SmallPtrSetImpl<Instruction *> &IgnoredInsts) {}

// If we have a negative stride, Start refers to the end of the memory location
// we're trying to memset.  Therefore, we need to recompute the base pointer,
// which is just Start - BECount*Size.
static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
                                        Type *IntPtr, const SCEV *StoreSizeSCEV,
                                        ScalarEvolution *SE) {}

/// Compute the number of bytes as a SCEV from the backedge taken count.
///
/// This also maps the SCEV into the provided type and tries to handle the
/// computation in a way that will fold cleanly.
static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
                               const SCEV *StoreSizeSCEV, Loop *CurLoop,
                               const DataLayout *DL, ScalarEvolution *SE) {}

/// processLoopStridedStore - We see a strided store of some value.  If we can
/// transform this into a memset or memset_pattern in the loop preheader, do so.
bool LoopIdiomRecognize::processLoopStridedStore(
    Value *DestPtr, const SCEV *StoreSizeSCEV, MaybeAlign StoreAlignment,
    Value *StoredVal, Instruction *TheStore,
    SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
    const SCEV *BECount, bool IsNegStride, bool IsLoopMemset) {}

/// If the stored value is a strided load in the same loop with the same stride
/// this may be transformable into a memcpy.  This kicks in for stuff like
/// for (i) A[i] = B[i];
bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
                                                    const SCEV *BECount) {}

namespace {
class MemmoveVerifier {};
} // namespace

bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
    Value *DestPtr, Value *SourcePtr, const SCEV *StoreSizeSCEV,
    MaybeAlign StoreAlign, MaybeAlign LoadAlign, Instruction *TheStore,
    Instruction *TheLoad, const SCEVAddRecExpr *StoreEv,
    const SCEVAddRecExpr *LoadEv, const SCEV *BECount) {}

// When compiling for codesize we avoid idiom recognition for a multi-block loop
// unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop.
//
bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
                                                   bool IsLoopMemset) {}

bool LoopIdiomRecognize::runOnNoncountableLoop() {}

/// Check if the given conditional branch is based on the comparison between
/// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
/// true), the control yields to the loop entry. If the branch matches the
/// behavior, the variable involved in the comparison is returned. This function
/// will be called to see if the precondition and postcondition of the loop are
/// in desirable form.
static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
                             bool JmpOnZero = false) {}

/// Check if the given conditional branch is based on an unsigned less-than
/// comparison between a variable and a constant, and if the comparison is false
/// the control yields to the loop entry. If the branch matches the behaviour,
/// the variable involved in the comparison is returned.
static Value *matchShiftULTCondition(BranchInst *BI, BasicBlock *LoopEntry,
                                     APInt &Threshold) {}

// Check if the recurrence variable `VarX` is in the right form to create
// the idiom. Returns the value coerced to a PHINode if so.
static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
                                 BasicBlock *LoopEntry) {}

/// Return true if the idiom is detected in the loop.
///
/// Additionally:
/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
///       or nullptr if there is no such.
/// 2) \p CntPhi is set to the corresponding phi node
///       or nullptr if there is no such.
/// 3) \p InitX is set to the value whose CTLZ could be used.
/// 4) \p DefX is set to the instruction calculating Loop exit condition.
/// 5) \p Threshold is set to the constant involved in the unsigned less-than
///       comparison.
///
/// The core idiom we are trying to detect is:
/// \code
///    if (x0 < 2)
///      goto loop-exit // the precondition of the loop
///    cnt0 = init-val
///    do {
///      x = phi (x0, x.next);   //PhiX
///      cnt = phi (cnt0, cnt.next)
///
///      cnt.next = cnt + 1;
///       ...
///      x.next = x >> 1;   // DefX
///    } while (x >= 4)
/// loop-exit:
/// \endcode
static bool detectShiftUntilLessThanIdiom(Loop *CurLoop, const DataLayout &DL,
                                          Intrinsic::ID &IntrinID,
                                          Value *&InitX, Instruction *&CntInst,
                                          PHINode *&CntPhi, Instruction *&DefX,
                                          APInt &Threshold) {}

/// Return true iff the idiom is detected in the loop.
///
/// Additionally:
/// 1) \p CntInst is set to the instruction counting the population bit.
/// 2) \p CntPhi is set to the corresponding phi node.
/// 3) \p Var is set to the value whose population bits are being counted.
///
/// The core idiom we are trying to detect is:
/// \code
///    if (x0 != 0)
///      goto loop-exit // the precondition of the loop
///    cnt0 = init-val;
///    do {
///       x1 = phi (x0, x2);
///       cnt1 = phi(cnt0, cnt2);
///
///       cnt2 = cnt1 + 1;
///        ...
///       x2 = x1 & (x1 - 1);
///        ...
///    } while(x != 0);
///
/// loop-exit:
/// \endcode
static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
                                Instruction *&CntInst, PHINode *&CntPhi,
                                Value *&Var) {}

/// Return true if the idiom is detected in the loop.
///
/// Additionally:
/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
///       or nullptr if there is no such.
/// 2) \p CntPhi is set to the corresponding phi node
///       or nullptr if there is no such.
/// 3) \p Var is set to the value whose CTLZ could be used.
/// 4) \p DefX is set to the instruction calculating Loop exit condition.
///
/// The core idiom we are trying to detect is:
/// \code
///    if (x0 == 0)
///      goto loop-exit // the precondition of the loop
///    cnt0 = init-val;
///    do {
///       x = phi (x0, x.next);   //PhiX
///       cnt = phi(cnt0, cnt.next);
///
///       cnt.next = cnt + 1;
///        ...
///       x.next = x >> 1;   // DefX
///        ...
///    } while(x.next != 0);
///
/// loop-exit:
/// \endcode
static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
                                      Intrinsic::ID &IntrinID, Value *&InitX,
                                      Instruction *&CntInst, PHINode *&CntPhi,
                                      Instruction *&DefX) {}

// Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
// profitable if we delete the loop.
bool LoopIdiomRecognize::isProfitableToInsertFFS(Intrinsic::ID IntrinID,
                                                 Value *InitX, bool ZeroCheck,
                                                 size_t CanonicalSize) {}

/// Convert CTLZ / CTTZ idiom loop into countable loop.
/// If CTLZ / CTTZ inserted as a new trip count returns true; otherwise,
/// returns false.
bool LoopIdiomRecognize::insertFFSIfProfitable(Intrinsic::ID IntrinID,
                                               Value *InitX, Instruction *DefX,
                                               PHINode *CntPhi,
                                               Instruction *CntInst) {}

/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
/// trip count returns true; otherwise, returns false.
bool LoopIdiomRecognize::recognizeAndInsertFFS() {}

bool LoopIdiomRecognize::recognizeShiftUntilLessThan() {}

/// Recognizes a population count idiom in a non-countable loop.
///
/// If detected, transforms the relevant code to issue the popcount intrinsic
/// function call, and returns true; otherwise, returns false.
bool LoopIdiomRecognize::recognizePopcount() {}

static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
                                       const DebugLoc &DL) {}

static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
                                    const DebugLoc &DL, bool ZeroCheck,
                                    Intrinsic::ID IID) {}

/// Transform the following loop (Using CTLZ, CTTZ is similar):
/// loop:
///   CntPhi = PHI [Cnt0, CntInst]
///   PhiX = PHI [InitX, DefX]
///   CntInst = CntPhi + 1
///   DefX = PhiX >> 1
///   LOOP_BODY
///   Br: loop if (DefX != 0)
/// Use(CntPhi) or Use(CntInst)
///
/// Into:
/// If CntPhi used outside the loop:
///   CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1)
///   Count = CountPrev + 1
/// else
///   Count = BitWidth(InitX) - CTLZ(InitX)
/// loop:
///   CntPhi = PHI [Cnt0, CntInst]
///   PhiX = PHI [InitX, DefX]
///   PhiCount = PHI [Count, Dec]
///   CntInst = CntPhi + 1
///   DefX = PhiX >> 1
///   Dec = PhiCount - 1
///   LOOP_BODY
///   Br: loop if (Dec != 0)
/// Use(CountPrev + Cnt0) // Use(CntPhi)
/// or
/// Use(Count + Cnt0) // Use(CntInst)
///
/// If LOOP_BODY is empty the loop will be deleted.
/// If CntInst and DefX are not used in LOOP_BODY they will be removed.
void LoopIdiomRecognize::transformLoopToCountable(
    Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
    PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
    bool ZeroCheck, bool IsCntPhiUsedOutsideLoop, bool InsertSub) {}

void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
                                                 Instruction *CntInst,
                                                 PHINode *CntPhi, Value *Var) {}

/// Match loop-invariant value.
template <typename SubPattern_t> struct match_LoopInvariant {};

/// Matches if the value is loop-invariant.
template <typename Ty>
inline match_LoopInvariant<Ty> m_LoopInvariant(const Ty &M, const Loop *L) {}

/// Return true if the idiom is detected in the loop.
///
/// The core idiom we are trying to detect is:
/// \code
///   entry:
///     <...>
///     %bitmask = shl i32 1, %bitpos
///     br label %loop
///
///   loop:
///     %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
///     %x.curr.bitmasked = and i32 %x.curr, %bitmask
///     %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
///     %x.next = shl i32 %x.curr, 1
///     <...>
///     br i1 %x.curr.isbitunset, label %loop, label %end
///
///   end:
///     %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
///     %x.next.res = phi i32 [ %x.next, %loop ] <...>
///     <...>
/// \endcode
static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
                                         Value *&BitMask, Value *&BitPos,
                                         Value *&CurrX, Instruction *&NextX) {}

/// Look for the following loop:
/// \code
///   entry:
///     <...>
///     %bitmask = shl i32 1, %bitpos
///     br label %loop
///
///   loop:
///     %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
///     %x.curr.bitmasked = and i32 %x.curr, %bitmask
///     %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
///     %x.next = shl i32 %x.curr, 1
///     <...>
///     br i1 %x.curr.isbitunset, label %loop, label %end
///
///   end:
///     %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
///     %x.next.res = phi i32 [ %x.next, %loop ] <...>
///     <...>
/// \endcode
///
/// And transform it into:
/// \code
///   entry:
///     %bitmask = shl i32 1, %bitpos
///     %lowbitmask = add i32 %bitmask, -1
///     %mask = or i32 %lowbitmask, %bitmask
///     %x.masked = and i32 %x, %mask
///     %x.masked.numleadingzeros = call i32 @llvm.ctlz.i32(i32 %x.masked,
///                                                         i1 true)
///     %x.masked.numactivebits = sub i32 32, %x.masked.numleadingzeros
///     %x.masked.leadingonepos = add i32 %x.masked.numactivebits, -1
///     %backedgetakencount = sub i32 %bitpos, %x.masked.leadingonepos
///     %tripcount = add i32 %backedgetakencount, 1
///     %x.curr = shl i32 %x, %backedgetakencount
///     %x.next = shl i32 %x, %tripcount
///     br label %loop
///
///   loop:
///     %loop.iv = phi i32 [ 0, %entry ], [ %loop.iv.next, %loop ]
///     %loop.iv.next = add nuw i32 %loop.iv, 1
///     %loop.ivcheck = icmp eq i32 %loop.iv.next, %tripcount
///     <...>
///     br i1 %loop.ivcheck, label %end, label %loop
///
///   end:
///     %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
///     %x.next.res = phi i32 [ %x.next, %loop ] <...>
///     <...>
/// \endcode
bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {}

/// Return true if the idiom is detected in the loop.
///
/// The core idiom we are trying to detect is:
/// \code
///   entry:
///     <...>
///     %start = <...>
///     %extraoffset = <...>
///     <...>
///     br label %for.cond
///
///   loop:
///     %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
///     %nbits = add nsw i8 %iv, %extraoffset
///     %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
///     %val.shifted.iszero = icmp eq i8 %val.shifted, 0
///     %iv.next = add i8 %iv, 1
///     <...>
///     br i1 %val.shifted.iszero, label %end, label %loop
///
///   end:
///     %iv.res = phi i8 [ %iv, %loop ] <...>
///     %nbits.res = phi i8 [ %nbits, %loop ] <...>
///     %val.shifted.res = phi i8 [ %val.shifted, %loop ] <...>
///     %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ] <...>
///     %iv.next.res = phi i8 [ %iv.next, %loop ] <...>
///     <...>
/// \endcode
static bool detectShiftUntilZeroIdiom(Loop *CurLoop, ScalarEvolution *SE,
                                      Instruction *&ValShiftedIsZero,
                                      Intrinsic::ID &IntrinID, Instruction *&IV,
                                      Value *&Start, Value *&Val,
                                      const SCEV *&ExtraOffsetExpr,
                                      bool &InvertedCond) {}

/// Look for the following loop:
/// \code
///   entry:
///     <...>
///     %start = <...>
///     %extraoffset = <...>
///     <...>
///     br label %for.cond
///
///   loop:
///     %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
///     %nbits = add nsw i8 %iv, %extraoffset
///     %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
///     %val.shifted.iszero = icmp eq i8 %val.shifted, 0
///     %iv.next = add i8 %iv, 1
///     <...>
///     br i1 %val.shifted.iszero, label %end, label %loop
///
///   end:
///     %iv.res = phi i8 [ %iv, %loop ] <...>
///     %nbits.res = phi i8 [ %nbits, %loop ] <...>
///     %val.shifted.res = phi i8 [ %val.shifted, %loop ] <...>
///     %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ] <...>
///     %iv.next.res = phi i8 [ %iv.next, %loop ] <...>
///     <...>
/// \endcode
///
/// And transform it into:
/// \code
///   entry:
///     <...>
///     %start = <...>
///     %extraoffset = <...>
///     <...>
///     %val.numleadingzeros = call i8 @llvm.ct{l,t}z.i8(i8 %val, i1 0)
///     %val.numactivebits = sub i8 8, %val.numleadingzeros
///     %extraoffset.neg = sub i8 0, %extraoffset
///     %tmp = add i8 %val.numactivebits, %extraoffset.neg
///     %iv.final = call i8 @llvm.smax.i8(i8 %tmp, i8 %start)
///     %loop.tripcount = sub i8 %iv.final, %start
///     br label %loop
///
///   loop:
///     %loop.iv = phi i8 [ 0, %entry ], [ %loop.iv.next, %loop ]
///     %loop.iv.next = add i8 %loop.iv, 1
///     %loop.ivcheck = icmp eq i8 %loop.iv.next, %loop.tripcount
///     %iv = add i8 %loop.iv, %start
///     <...>
///     br i1 %loop.ivcheck, label %end, label %loop
///
///   end:
///     %iv.res = phi i8 [ %iv.final, %loop ] <...>
///     <...>
/// \endcode
bool LoopIdiomRecognize::recognizeShiftUntilZero() {}