PPCISelDAGToDAG.cpp | Explore in Territory

//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines a pattern matching instruction selector for PowerPC,
// converting from a legalized dag to a PPC dag.
//
//===----------------------------------------------------------------------===//

#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPC.h"
#include "PPCISelLowering.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <limits>
#include <memory>
#include <new>
#include <tuple>
#include <utility>

usingnamespacellvm;

#define DEBUG_TYPE …
#define PASS_NAME …

STATISTIC(NumSextSetcc,
          "Number of (sext(setcc)) nodes expanded into GPR sequence.");
STATISTIC(NumZextSetcc,
          "Number of (zext(setcc)) nodes expanded into GPR sequence.");
STATISTIC(SignExtensionsAdded,
          "Number of sign extensions for compare inputs added.");
STATISTIC(ZeroExtensionsAdded,
          "Number of zero extensions for compare inputs added.");
STATISTIC(NumLogicOpsOnComparison,
          "Number of logical ops on i1 values calculated in GPR.");
STATISTIC(OmittedForNonExtendUses,
          "Number of compares not eliminated as they have non-extending uses.");
STATISTIC(NumP9Setb,
          "Number of compares lowered to setb.");

// FIXME: Remove this once the bug has been fixed!
cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);

static cl::opt<bool>
    UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
                       cl::desc("use aggressive ppc isel for bit permutations"),
                       cl::Hidden);
static cl::opt<bool> BPermRewriterNoMasking(
    "ppc-bit-perm-rewriter-stress-rotates",
    cl::desc("stress rotate selection in aggressive ppc isel for "
             "bit permutations"),
    cl::Hidden);

static cl::opt<bool> EnableBranchHint(
  "ppc-use-branch-hint", cl::init(true),
    cl::desc("Enable static hinting of branches on ppc"),
    cl::Hidden);

static cl::opt<bool> EnableTLSOpt(
  "ppc-tls-opt", cl::init(true),
    cl::desc("Enable tls optimization peephole"),
    cl::Hidden);

enum ICmpInGPRType { … };

static cl::opt<ICmpInGPRType> CmpInGPR(
  "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
  cl::desc("Specify the types of comparisons to emit GPR-only code for."),
  cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
             clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
             clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
             clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
             clEnumValN(ICGPR_NonExtIn, "nonextin",
                        "Only comparisons where inputs don't need [sz]ext."),
             clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
             clEnumValN(ICGPR_ZextI32, "zexti32",
                        "Only i32 comparisons with zext result."),
             clEnumValN(ICGPR_ZextI64, "zexti64",
                        "Only i64 comparisons with zext result."),
             clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
             clEnumValN(ICGPR_SextI32, "sexti32",
                        "Only i32 comparisons with sext result."),
             clEnumValN(ICGPR_SextI64, "sexti64",
                        "Only i64 comparisons with sext result.")));
namespace {

  //===--------------------------------------------------------------------===//
  /// PPCDAGToDAGISel - PPC specific code to select PPC machine
  /// instructions for SelectionDAG operations.
  ///
  class PPCDAGToDAGISel : public SelectionDAGISel { … };

  class PPCDAGToDAGISelLegacy : public SelectionDAGISelLegacy { … };
} // end anonymous namespace

char PPCDAGToDAGISelLegacy::ID = …;

INITIALIZE_PASS(…)

/// getGlobalBaseReg - Output the instructions required to put the
/// base address to use for accessing globals into a register.
///
SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { … }

// Check if a SDValue has the toc-data attribute.
static bool hasTocDataAttr(SDValue Val) { … }

static CodeModel::Model getCodeModel(const PPCSubtarget &Subtarget,
                                     const TargetMachine &TM,
                                     const SDNode *Node) { … }

/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
/// operand. If so Imm will receive the 32-bit value.
static bool isInt32Immediate(SDNode *N, unsigned &Imm) { … }

/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
/// operand.  If so Imm will receive the 64-bit value.
static bool isInt64Immediate(SDNode *N, uint64_t &Imm) { … }

// isInt32Immediate - This method tests to see if a constant operand.
// If so Imm will receive the 32 bit value.
static bool isInt32Immediate(SDValue N, unsigned &Imm) { … }

/// isInt64Immediate - This method tests to see if the value is a 64-bit
/// constant operand. If so Imm will receive the 64-bit value.
static bool isInt64Immediate(SDValue N, uint64_t &Imm) { … }

static unsigned getBranchHint(unsigned PCC,
                              const FunctionLoweringInfo &FuncInfo,
                              const SDValue &DestMBB) { … }

// isOpcWithIntImmediate - This method tests to see if the node is a specific
// opcode and that it has a immediate integer right operand.
// If so Imm will receive the 32 bit value.
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { … }

void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset) { … }

bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
                                      bool isShiftMask, unsigned &SH,
                                      unsigned &MB, unsigned &ME) { … }

// isThreadPointerAcquisitionNode - Check if the operands of an ADD_TLS
// instruction use the thread pointer.
static bool isThreadPointerAcquisitionNode(SDValue Base, SelectionDAG *CurDAG) { … }

// canOptimizeTLSDFormToXForm - Optimize TLS accesses when an ADD_TLS
// instruction is present. An ADD_TLS instruction, followed by a D-Form memory
// operation, can be optimized to use an X-Form load or store, allowing the
// ADD_TLS node to be removed completely.
static bool canOptimizeTLSDFormToXForm(SelectionDAG *CurDAG, SDValue Base) { … }

bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) { … }

bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) { … }

/// Turn an or of two masked values into the rotate left word immediate then
/// mask insert (rlwimi) instruction.
bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { … }

static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) { … }

// For any 32 < Num < 64, check if the Imm contains at least Num consecutive
// zeros and return the number of bits by the left of these consecutive zeros.
static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) { … }

// Direct materialization of 64-bit constants by enumerated patterns.
static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
                                  uint64_t Imm, unsigned &InstCnt) { … }

// Try to select instructions to generate a 64 bit immediate using prefix as
// well as non prefix instructions. The function will return the SDNode
// to materialize that constant or it will return nullptr if it does not
// find one. The variable InstCnt is set to the number of instructions that
// were selected.
static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl,
                                        uint64_t Imm, unsigned &InstCnt) { … }

static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
                            unsigned *InstCnt = nullptr) { … }

// Select a 64-bit constant.
static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) { … }

namespace {

class BitPermutationSelector { … };

class IntegerCompareEliminator { … };

// The obvious case for wanting to keep the value in a GPR. Namely, the
// result of the comparison is actually needed in a GPR.
SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) { … }

// Attempt to perform logical operations on the results of comparisons while
// keeping the values in GPRs. Without doing so, these would end up being
// lowered to CR-logical operations which suffer from significant latency and
// low ILP.
SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) { … }

// Lower a logical operation on i1 values into a GPR sequence if possible.
// The result can be kept in a GPR if requested.
// Three types of inputs can be handled:
// - SETCC
// - TRUNCATE
// - Logical operation (AND/OR/XOR)
// There is also a special case that is handled (namely a complement operation
// achieved with xor %a, -1).
SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) { … }

/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
/// Otherwise just reinterpret it as a 64-bit value.
/// Useful when emitting comparison code for 32-bit values without using
/// the compare instruction (which only considers the lower 32-bits).
SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) { … }

/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
/// Otherwise just reinterpret it as a 64-bit value.
/// Useful when emitting comparison code for 32-bit values without using
/// the compare instruction (which only considers the lower 32-bits).
SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) { … }

// Handle a 32-bit value in a 64-bit register and vice-versa. These are of
// course not actual zero/sign extensions that will generate machine code,
// they're just a way to reinterpret a 32 bit value in a register as a
// 64 bit value and vice-versa.
SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
                                                ExtOrTruncConversion Conv) { … }

// Produce a GPR sequence for compound comparisons (<=, >=) against zero.
// Handle both zero-extensions and sign-extensions.
SDValue
IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
                                                         ZeroCompare CmpTy) { … }

/// Produces a zero-extended result of comparing two 32-bit values according to
/// the passed condition code.
SDValue
IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
                                              ISD::CondCode CC,
                                              int64_t RHSValue, SDLoc dl) { … }

/// Produces a sign-extended result of comparing two 32-bit values according to
/// the passed condition code.
SDValue
IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
                                              ISD::CondCode CC,
                                              int64_t RHSValue, SDLoc dl) { … }

/// Produces a zero-extended result of comparing two 64-bit values according to
/// the passed condition code.
SDValue
IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
                                              ISD::CondCode CC,
                                              int64_t RHSValue, SDLoc dl) { … }

/// Produces a sign-extended result of comparing two 64-bit values according to
/// the passed condition code.
SDValue
IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
                                              ISD::CondCode CC,
                                              int64_t RHSValue, SDLoc dl) { … }

/// Do all uses of this SDValue need the result in a GPR?
/// This is meant to be used on values that have type i1 since
/// it is somewhat meaningless to ask if values of other types
/// should be kept in GPR's.
static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { … }

/// Returns an equivalent of a SETCC node but with the result the same width as
/// the inputs. This can also be used for SELECT_CC if either the true or false
/// values is a power of two while the other is zero.
SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
                                                SetccInGPROpts ConvOpts) { … }

} // end anonymous namespace

bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) { … }

bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { … }

/// SelectCC - Select a comparison of the specified values with the specified
/// condition code, returning the CR# of the expression.
SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
                                  const SDLoc &dl, SDValue Chain) { … }

static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT,
                                           const PPCSubtarget *Subtarget) { … }

/// getCRIdxForSetCC - Return the index of the condition register field
/// associated with the SetCC condition, and whether or not the field is
/// treated as inverted.  That is, lt = 0; ge = 0 inverted.
static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { … }

// getVCmpInst: return the vector compare instruction for the specified
// vector type and condition code. Since this is for altivec specific code,
// only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
// and v4f32).
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
                                bool HasVSX, bool &Swap, bool &Negate) { … }

bool PPCDAGToDAGISel::trySETCC(SDNode *N) { … }

/// Does this node represent a load/store node whose address can be represented
/// with a register plus an immediate that's a multiple of \p Val:
bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { … }

void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { … }

static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
                         bool &NeedSwapOps, bool &IsUnCmp) { … }

// Return true if it's a software square-root/divide operand.
static bool isSWTestOp(SDValue N) { … }

bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) { … }

bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(SDNode *N) { … }

bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) { … }

bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) { … }

bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) { … }

bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) { … }

bool PPCDAGToDAGISel::tryAsSingleRLDCL(SDNode *N) { … }

bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) { … }

bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) { … }

bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) { … }

// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
void PPCDAGToDAGISel::Select(SDNode *N) { … }

// If the target supports the cmpb instruction, do the idiom recognition here.
// We don't do this as a DAG combine because we don't want to do it as nodes
// are being combined (because we might miss part of the eventual idiom). We
// don't want to do it during instruction selection because we want to reuse
// the logic for lowering the masking operations already part of the
// instruction selector.
SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { … }

// When CR bit registers are enabled, an extension of an i1 variable to a i32
// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
// involves constant materialization of a 0 or a 1 or both. If the result of
// the extension is then operated upon by some operator that can be constant
// folded with a constant 0 or 1, and that constant can be materialized using
// only one instruction (like a zero or one), then we should fold in those
// operations with the select.
void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { … }

void PPCDAGToDAGISel::PreprocessISelDAG() { … }

/// PostprocessISelDAG - Perform some late peephole optimizations
/// on the DAG representation.
void PPCDAGToDAGISel::PostprocessISelDAG() { … }

// Check if all users of this node will become isel where the second operand
// is the constant zero. If this is so, and if we can negate the condition,
// then we can flip the true and false operands. This will allow the zero to
// be folded with the isel so that we don't need to materialize a register
// containing zero.
bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { … }

void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) { … }

void PPCDAGToDAGISel::PeepholeCROps() { … }

// Gather the set of 32-bit operations that are known to have their
// higher-order 32 bits zero, where ToPromote contains all such operations.
static bool PeepholePPC64ZExtGather(SDValue Op32,
                                    SmallPtrSetImpl<SDNode *> &ToPromote) { … }

void PPCDAGToDAGISel::PeepholePPC64ZExt() { … }

static bool isVSXSwap(SDValue N) { … }

// TODO: Make this complete and replace with a table-gen bit.
static bool isLaneInsensitive(SDValue N) { … }

// Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is
// lane-insensitive.
static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) { … }

// Check if an SDValue has the 'aix-small-tls' global variable attribute.
static bool hasAIXSmallTLSAttr(SDValue Val) { … }

// Is an ADDI eligible for folding for non-TOC-based local-[exec|dynamic]
// accesses?
static bool isEligibleToFoldADDIForFasterLocalAccesses(SelectionDAG *DAG,
                                                       SDValue ADDIToFold) { … }

// For non-TOC-based local-[exec|dynamic] access where an addi is feeding into
// another addi, fold this sequence into a single addi if possible. Before this
// optimization, the sequence appears as:
//    addi rN, r13, sym@[le|ld]
//    addi rM, rN, imm
// After this optimization, we can fold the two addi into a single one:
//    addi rM, r13, sym@[le|ld] + imm
static void foldADDIForFasterLocalAccesses(SDNode *N, SelectionDAG *DAG) { … }

void PPCDAGToDAGISel::PeepholePPC64() { … }

/// createPPCISelDag - This pass converts a legalized DAG into a
/// PowerPC-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM,
                                     CodeGenOptLevel OptLevel) { … }
llvm/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp