MVELaneInterleavingPass.cpp | Explore in Territory

//===- MVELaneInterleaving.cpp - Inverleave for MVE instructions ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass interleaves around sext/zext/trunc instructions. MVE does not have
// a single sext/zext or trunc instruction that takes the bottom half of a
// vector and extends to a full width, like NEON has with MOVL. Instead it is
// expected that this happens through top/bottom instructions. So the MVE
// equivalent VMOVLT/B instructions take either the even or odd elements of the
// input and extend them to the larger type, producing a vector with half the
// number of elements each of double the bitwidth. As there is no simple
// instruction, we often have to turn sext/zext/trunc into a series of lane
// moves (or stack loads/stores, which we do not do yet).
//
// This pass takes vector code that starts at truncs, looks for interconnected
// blobs of operations that end with sext/zext (or constants/splats) of the
// form:
//   %sa = sext v8i16 %a to v8i32
//   %sb = sext v8i16 %b to v8i32
//   %add = add v8i32 %sa, %sb
//   %r = trunc %add to v8i16
// And adds shuffles to allow the use of VMOVL/VMOVN instrctions:
//   %sha = shuffle v8i16 %a, undef, <0, 2, 4, 6, 1, 3, 5, 7>
//   %sa = sext v8i16 %sha to v8i32
//   %shb = shuffle v8i16 %b, undef, <0, 2, 4, 6, 1, 3, 5, 7>
//   %sb = sext v8i16 %shb to v8i32
//   %add = add v8i32 %sa, %sb
//   %r = trunc %add to v8i16
//   %shr = shuffle v8i16 %r, undef, <0, 4, 1, 5, 2, 6, 3, 7>
// Which can then be split and lowered to MVE instructions efficiently:
//   %sa_b = VMOVLB.s16 %a
//   %sa_t = VMOVLT.s16 %a
//   %sb_b = VMOVLB.s16 %b
//   %sb_t = VMOVLT.s16 %b
//   %add_b = VADD.i32 %sa_b, %sb_b
//   %add_t = VADD.i32 %sa_t, %sb_t
//   %r = VMOVNT.i16 %add_b, %add_t
//
//===----------------------------------------------------------------------===//

#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include <algorithm>
#include <cassert>

usingnamespacellvm;

#define DEBUG_TYPE …

cl::opt<bool> EnableInterleave(
    "enable-mve-interleave", cl::Hidden, cl::init(true),
    cl::desc("Enable interleave MVE vector operation lowering"));

namespace {

class MVELaneInterleaving : public FunctionPass { … };

} // end anonymous namespace

char MVELaneInterleaving::ID = …;

INITIALIZE_PASS(…)

Pass *llvm::createMVELaneInterleavingPass() { … }

static bool isProfitableToInterleave(SmallSetVector<Instruction *, 4> &Exts,
                                     SmallSetVector<Instruction *, 4> &Truncs) { … }

static bool tryInterleave(Instruction *Start,
                          SmallPtrSetImpl<Instruction *> &Visited) { … }

// Add reductions are fairly common and associative, meaning we can start the
// interleaving from them and don't need to emit a shuffle.
static bool isAddReduction(Instruction &I) { … }

bool MVELaneInterleaving::runOnFunction(Function &F) { … }
llvm/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp