AArch64AdvSIMDScalarPass.cpp | Explore in Territory

//===-- AArch64AdvSIMDScalar.cpp - Replace dead defs w/ zero reg --===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// When profitable, replace GPR targeting i64 instructions with their
// AdvSIMD scalar equivalents. Generally speaking, "profitable" is defined
// as minimizing the number of cross-class register copies.
//===----------------------------------------------------------------------===//

//===----------------------------------------------------------------------===//
// TODO: Graph based predicate heuristics.
// Walking the instruction list linearly will get many, perhaps most, of
// the cases, but to do a truly thorough job of this, we need a more
// wholistic approach.
//
// This optimization is very similar in spirit to the register allocator's
// spill placement, only here we're determining where to place cross-class
// register copies rather than spills. As such, a similar approach is
// called for.
//
// We want to build up a set of graphs of all instructions which are candidates
// for transformation along with instructions which generate their inputs and
// consume their outputs. For each edge in the graph, we assign a weight
// based on whether there is a copy required there (weight zero if not) and
// the block frequency of the block containing the defining or using
// instruction, whichever is less. Our optimization is then a graph problem
// to minimize the total weight of all the graphs, then transform instructions
// and add or remove copy instructions as called for to implement the
// solution.
//===----------------------------------------------------------------------===//

#include "AArch64.h"
#include "AArch64InstrInfo.h"
#include "AArch64RegisterInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
usingnamespacellvm;

#define DEBUG_TYPE …

// Allow forcing all i64 operations with equivalent SIMD instructions to use
// them. For stress-testing the transformation function.
static cl::opt<bool>
TransformAll("aarch64-simd-scalar-force-all",
             cl::desc("Force use of AdvSIMD scalar instructions everywhere"),
             cl::init(false), cl::Hidden);

STATISTIC(NumScalarInsnsUsed, "Number of scalar instructions used");
STATISTIC(NumCopiesDeleted, "Number of cross-class copies deleted");
STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted");

#define AARCH64_ADVSIMD_NAME …

namespace {
class AArch64AdvSIMDScalar : public MachineFunctionPass { … };
char AArch64AdvSIMDScalar::ID = …;
} // end anonymous namespace

INITIALIZE_PASS(…)

static bool isGPR64(unsigned Reg, unsigned SubReg,
                    const MachineRegisterInfo *MRI) { … }

static bool isFPR64(unsigned Reg, unsigned SubReg,
                    const MachineRegisterInfo *MRI) { … }

// getSrcFromCopy - Get the original source register for a GPR64 <--> FPR64
// copy instruction. Return nullptr if the instruction is not a copy.
static MachineOperand *getSrcFromCopy(MachineInstr *MI,
                                      const MachineRegisterInfo *MRI,
                                      unsigned &SubReg) { … }

// getTransformOpcode - For any opcode for which there is an AdvSIMD equivalent
// that we're considering transforming to, return that AdvSIMD opcode. For all
// others, return the original opcode.
static unsigned getTransformOpcode(unsigned Opc) { … }

static bool isTransformable(const MachineInstr &MI) { … }

// isProfitableToTransform - Predicate function to determine whether an
// instruction should be transformed to its equivalent AdvSIMD scalar
// instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example.
bool AArch64AdvSIMDScalar::isProfitableToTransform(
    const MachineInstr &MI) const { … }

static MachineInstr *insertCopy(const TargetInstrInfo *TII, MachineInstr &MI,
                                unsigned Dst, unsigned Src, bool IsKill) { … }

// transformInstruction - Perform the transformation of an instruction
// to its equivalant AdvSIMD scalar instruction. Update inputs and outputs
// to be the correct register class, minimizing cross-class copies.
void AArch64AdvSIMDScalar::transformInstruction(MachineInstr &MI) { … }

// processMachineBasicBlock - Main optimzation loop.
bool AArch64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) { … }

// runOnMachineFunction - Pass entry point from PassManager.
bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) { … }

// createAArch64AdvSIMDScalar - Factory function used by AArch64TargetMachine
// to add the pass to the PassManager.
FunctionPass *llvm::createAArch64AdvSIMDScalar() { … }
llvm/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp