//===- MVETailPredication.cpp - MVE Tail Predication ------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// Armv8.1m introduced MVE, M-Profile Vector Extension, and low-overhead /// branches to help accelerate DSP applications. These two extensions, /// combined with a new form of predication called tail-predication, can be used /// to provide implicit vector predication within a low-overhead loop. /// This is implicit because the predicate of active/inactive lanes is /// calculated by hardware, and thus does not need to be explicitly passed /// to vector instructions. The instructions responsible for this are the /// DLSTP and WLSTP instructions, which setup a tail-predicated loop and the /// the total number of data elements processed by the loop. The loop-end /// LETP instruction is responsible for decrementing and setting the remaining /// elements to be processed and generating the mask of active lanes. /// /// The HardwareLoops pass inserts intrinsics identifying loops that the /// backend will attempt to convert into a low-overhead loop. The vectorizer is /// responsible for generating a vectorized loop in which the lanes are /// predicated upon an get.active.lane.mask intrinsic. This pass looks at these /// get.active.lane.mask intrinsic and attempts to convert them to VCTP /// instructions. This will be picked up by the ARM Low-overhead loop pass later /// in the backend, which performs the final transformation to a DLSTP or WLSTP /// tail-predicated loop. // //===----------------------------------------------------------------------===// #include "ARM.h" #include "ARMSubtarget.h" #include "ARMTargetTransformInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" usingnamespacellvm; #define DEBUG_TYPE … #define DESC … cl::opt<TailPredication::Mode> EnableTailPredication( "tail-predication", cl::desc("MVE tail-predication pass options"), cl::init(TailPredication::Enabled), cl::values(clEnumValN(TailPredication::Disabled, "disabled", "Don't tail-predicate loops"), clEnumValN(TailPredication::EnabledNoReductions, "enabled-no-reductions", "Enable tail-predication, but not for reduction loops"), clEnumValN(TailPredication::Enabled, "enabled", "Enable tail-predication, including reduction loops"), clEnumValN(TailPredication::ForceEnabledNoReductions, "force-enabled-no-reductions", "Enable tail-predication, but not for reduction loops, " "and force this which might be unsafe"), clEnumValN(TailPredication::ForceEnabled, "force-enabled", "Enable tail-predication, including reduction loops, " "and force this which might be unsafe"))); namespace { class MVETailPredication : public LoopPass { … }; } // end namespace bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) { … } // The active lane intrinsic has this form: // // @llvm.get.active.lane.mask(IV, TC) // // Here we perform checks that this intrinsic behaves as expected, // which means: // // 1) Check that the TripCount (TC) belongs to this loop (originally). // 2) The element count (TC) needs to be sufficiently large that the decrement // of element counter doesn't overflow, which means that we need to prove: // ceil(ElementCount / VectorWidth) >= TripCount // by rounding up ElementCount up: // ((ElementCount + (VectorWidth - 1)) / VectorWidth // and evaluate if expression isKnownNonNegative: // (((ElementCount + (VectorWidth - 1)) / VectorWidth) - TripCount // 3) The IV must be an induction phi with an increment equal to the // vector width. const SCEV *MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount) { … } void MVETailPredication::InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask, Value *Start) { … } bool MVETailPredication::TryConvertActiveLaneMask(Value *TripCount) { … } Pass *llvm::createMVETailPredicationPass() { … } char MVETailPredication::ID = …; INITIALIZE_PASS_BEGIN(MVETailPredication, DEBUG_TYPE, DESC, false, false) INITIALIZE_PASS_END(MVETailPredication, DEBUG_TYPE, DESC, false, false)