//===- MVELaneInterleaving.cpp - Inverleave for MVE instructions ----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass interleaves around sext/zext/trunc instructions. MVE does not have // a single sext/zext or trunc instruction that takes the bottom half of a // vector and extends to a full width, like NEON has with MOVL. Instead it is // expected that this happens through top/bottom instructions. So the MVE // equivalent VMOVLT/B instructions take either the even or odd elements of the // input and extend them to the larger type, producing a vector with half the // number of elements each of double the bitwidth. As there is no simple // instruction, we often have to turn sext/zext/trunc into a series of lane // moves (or stack loads/stores, which we do not do yet). // // This pass takes vector code that starts at truncs, looks for interconnected // blobs of operations that end with sext/zext (or constants/splats) of the // form: // %sa = sext v8i16 %a to v8i32 // %sb = sext v8i16 %b to v8i32 // %add = add v8i32 %sa, %sb // %r = trunc %add to v8i16 // And adds shuffles to allow the use of VMOVL/VMOVN instrctions: // %sha = shuffle v8i16 %a, undef, <0, 2, 4, 6, 1, 3, 5, 7> // %sa = sext v8i16 %sha to v8i32 // %shb = shuffle v8i16 %b, undef, <0, 2, 4, 6, 1, 3, 5, 7> // %sb = sext v8i16 %shb to v8i32 // %add = add v8i32 %sa, %sb // %r = trunc %add to v8i16 // %shr = shuffle v8i16 %r, undef, <0, 4, 1, 5, 2, 6, 3, 7> // Which can then be split and lowered to MVE instructions efficiently: // %sa_b = VMOVLB.s16 %a // %sa_t = VMOVLT.s16 %a // %sb_b = VMOVLB.s16 %b // %sb_t = VMOVLT.s16 %b // %add_b = VADD.i32 %sa_b, %sb_b // %add_t = VADD.i32 %sa_t, %sb_t // %r = VMOVNT.i16 %add_b, %add_t // //===----------------------------------------------------------------------===// #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include <algorithm> #include <cassert> usingnamespacellvm; #define DEBUG_TYPE … cl::opt<bool> EnableInterleave( "enable-mve-interleave", cl::Hidden, cl::init(true), cl::desc("Enable interleave MVE vector operation lowering")); namespace { class MVELaneInterleaving : public FunctionPass { … }; } // end anonymous namespace char MVELaneInterleaving::ID = …; INITIALIZE_PASS(…) Pass *llvm::createMVELaneInterleavingPass() { … } static bool isProfitableToInterleave(SmallSetVector<Instruction *, 4> &Exts, SmallSetVector<Instruction *, 4> &Truncs) { … } static bool tryInterleave(Instruction *Start, SmallPtrSetImpl<Instruction *> &Visited) { … } // Add reductions are fairly common and associative, meaning we can start the // interleaving from them and don't need to emit a shuffle. static bool isAddReduction(Instruction &I) { … } bool MVELaneInterleaving::runOnFunction(Function &F) { … }