//===----- SVEIntrinsicOpts - SVE ACLE Intrinsics Opts --------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Performs general IR level optimizations on SVE intrinsics. // // This pass performs the following optimizations: // // - removes unnecessary ptrue intrinsics (llvm.aarch64.sve.ptrue), e.g: // %1 = @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) // %2 = @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) // ; (%1 can be replaced with a reinterpret of %2) // // - optimizes ptest intrinsics where the operands are being needlessly // converted to and from svbool_t. // //===----------------------------------------------------------------------===// #include "AArch64.h" #include "Utils/AArch64BaseInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include <optional> usingnamespacellvm; usingnamespacellvm::PatternMatch; #define DEBUG_TYPE … namespace { struct SVEIntrinsicOpts : public ModulePass { … }; } // end anonymous namespace void SVEIntrinsicOpts::getAnalysisUsage(AnalysisUsage &AU) const { … } char SVEIntrinsicOpts::ID = …; static const char *name = …; INITIALIZE_PASS_BEGIN(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); INITIALIZE_PASS_END(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false) ModulePass *llvm::createSVEIntrinsicOptsPass() { … } /// Checks if a ptrue intrinsic call is promoted. The act of promoting a /// ptrue will introduce zeroing. For example: /// /// %1 = <vscale x 4 x i1> call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) /// %2 = <vscale x 16 x i1> call @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1) /// %3 = <vscale x 8 x i1> call @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %2) /// /// %1 is promoted, because it is converted: /// /// <vscale x 4 x i1> => <vscale x 16 x i1> => <vscale x 8 x i1> /// /// via a sequence of the SVE reinterpret intrinsics convert.{to,from}.svbool. static bool isPTruePromoted(IntrinsicInst *PTrue) { … } /// Attempts to coalesce ptrues in a basic block. bool SVEIntrinsicOpts::coalescePTrueIntrinsicCalls( BasicBlock &BB, SmallSetVector<IntrinsicInst *, 4> &PTrues) { … } /// The goal of this function is to remove redundant calls to the SVE ptrue /// intrinsic in each basic block within the given functions. /// /// SVE ptrues have two representations in LLVM IR: /// - a logical representation -- an arbitrary-width scalable vector of i1s, /// i.e. <vscale x N x i1>. /// - a physical representation (svbool, <vscale x 16 x i1>) -- a 16-element /// scalable vector of i1s, i.e. <vscale x 16 x i1>. /// /// The SVE ptrue intrinsic is used to create a logical representation of an SVE /// predicate. Suppose that we have two SVE ptrue intrinsic calls: P1 and P2. If /// P1 creates a logical SVE predicate that is at least as wide as the logical /// SVE predicate created by P2, then all of the bits that are true in the /// physical representation of P2 are necessarily also true in the physical /// representation of P1. P1 'encompasses' P2, therefore, the intrinsic call to /// P2 is redundant and can be replaced by an SVE reinterpret of P1 via /// convert.{to,from}.svbool. /// /// Currently, this pass only coalesces calls to SVE ptrue intrinsics /// if they match the following conditions: /// /// - the call to the intrinsic uses either the SV_ALL or SV_POW2 patterns. /// SV_ALL indicates that all bits of the predicate vector are to be set to /// true. SV_POW2 indicates that all bits of the predicate vector up to the /// largest power-of-two are to be set to true. /// - the result of the call to the intrinsic is not promoted to a wider /// predicate. In this case, keeping the extra ptrue leads to better codegen /// -- coalescing here would create an irreducible chain of SVE reinterprets /// via convert.{to,from}.svbool. /// /// EXAMPLE: /// /// %1 = <vscale x 8 x i1> ptrue(i32 SV_ALL) /// ; Logical: <1, 1, 1, 1, 1, 1, 1, 1> /// ; Physical: <1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0> /// ... /// /// %2 = <vscale x 4 x i1> ptrue(i32 SV_ALL) /// ; Logical: <1, 1, 1, 1> /// ; Physical: <1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0> /// ... /// /// Here, %2 can be replaced by an SVE reinterpret of %1, giving, for instance: /// /// %1 = <vscale x 8 x i1> ptrue(i32 i31) /// %2 = <vscale x 16 x i1> convert.to.svbool(<vscale x 8 x i1> %1) /// %3 = <vscale x 4 x i1> convert.from.svbool(<vscale x 16 x i1> %2) /// bool SVEIntrinsicOpts::optimizePTrueIntrinsicCalls( SmallSetVector<Function *, 4> &Functions) { … } // This is done in SVEIntrinsicOpts rather than InstCombine so that we introduce // scalable stores as late as possible bool SVEIntrinsicOpts::optimizePredicateStore(Instruction *I) { … } // This is done in SVEIntrinsicOpts rather than InstCombine so that we introduce // scalable loads as late as possible bool SVEIntrinsicOpts::optimizePredicateLoad(Instruction *I) { … } bool SVEIntrinsicOpts::optimizeInstructions( SmallSetVector<Function *, 4> &Functions) { … } bool SVEIntrinsicOpts::optimizeFunctions( SmallSetVector<Function *, 4> &Functions) { … } bool SVEIntrinsicOpts::runOnModule(Module &M) { … }