MLRegAllocEvictAdvisor.cpp | Explore in Territory

//===- MLRegAllocEvictAdvisor.cpp - ML eviction advisor -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Implementation of the ML eviction advisor and reward injection pass
//
//===----------------------------------------------------------------------===//

#include "AllocationOrder.h"
#include "RegAllocEvictionAdvisor.h"
#include "RegAllocGreedy.h"
#include "llvm/Analysis/InteractiveModelRunner.h"
#include "llvm/Analysis/MLModelRunner.h"
#include "llvm/Analysis/TensorSpec.h"
#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TFLITE)
#include "llvm/Analysis/ModelUnderTrainingRunner.h"
#include "llvm/Analysis/NoInferenceModelRunner.h"
#include "llvm/Analysis/Utils/TrainingLogger.h"
#endif
#include "MLRegAllocEvictAdvisor.h"
#include "llvm/Analysis/ReleaseModeModelRunner.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"

#include <array>
#include <bitset>
#include <memory>

usingnamespacellvm;

#define DEBUG_TYPE …

// Generated header in release (AOT) mode
#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
#include "RegAllocEvictModel.h"
using CompiledModelType = RegAllocEvictModel;
#else
CompiledModelType;
#endif

static cl::opt<std::string> InteractiveChannelBaseName(
    "regalloc-evict-interactive-channel-base", cl::Hidden,
    cl::desc(
        "Base file path for the interactive mode. The incoming filename should "
        "have the name <regalloc-evict-interactive-channel-base>.in, while the "
        "outgoing name should be "
        "<regalloc-evict-interactive-channel-base>.out"));

// Options that only make sense in development mode
#ifdef LLVM_HAVE_TFLITE
#include "RegAllocScore.h"
#include "llvm/Analysis/Utils/TFUtils.h"

static cl::opt<std::string> TrainingLog(
    "regalloc-training-log", cl::Hidden,
    cl::desc("Training log for the register allocator eviction model"));

static cl::opt<std::string> ModelUnderTraining(
    "regalloc-model", cl::Hidden,
    cl::desc("The model being trained for register allocation eviction"));

static cl::opt<bool> EnableDevelopmentFeatures(
    "regalloc-enable-development-features", cl::Hidden,
    cl::desc("Whether or not to enable features under development for the ML "
             "regalloc advisor"));

#else
static const bool EnableDevelopmentFeatures = …;
#endif // #ifdef LLVM_HAVE_TFLITE

/// The score injection pass.
/// This pass calculates the score for a function and inserts it in the log, but
/// this happens only in development mode. It's a no-op otherwise.
namespace llvm {
extern cl::opt<unsigned> EvictInterferenceCutoff;

class RegAllocScoring : public MachineFunctionPass { … };

char RegAllocScoring::ID = …;
FunctionPass *createRegAllocScoringPass() { … }

} // namespace llvm

INITIALIZE_PASS(…)

// ===================================
// Common ML Advisor declarations
// ===================================
namespace {
// The model can only accept a specified number of opcodes and will error it if
// fed an opcode it hasn't seen before. This constant sets the current cutoff.
static const int OpcodeValueCutoff = …;

// Most features are as described above, so we'll reuse this vector in defining
// them.
static const std::vector<int64_t> PerLiveRangeShape{ … };

// --------------
// Features table
// --------------
// For each interfering live range (incl. the candidate) we collect a number of
// features. However, because the features are of different types (and because
// of ML best practices), we organize the tensors per feature, not per
// candidate. Each such tensor has a scalar value corresponding to the
// interferring live range at that position, in the order in AllocationOrder.
// The last position corresponds to the virt reg seeking allocation.
// Exception to all that is the progression feature, which is just a scalar (see
// its documentation for details).
// Note on naming: the "_by_max" are normalized using the largest value of that
// tensor, as observed in the current decision making stage (i.e. for the
// current call to the advisor's tryFindEvictionCandidate)
//
// The feature list format: type, name, shape, documentation.
// Note: we can really just use int64 and float, hence the modeling of some
// bools as int64 values.
#define RA_EVICT_FEATURES_LIST(M) …

#ifdef LLVM_HAVE_TFLITE
#define RA_EVICT_FIRST_DEVELOPMENT_FEATURE …

#define RA_EVICT_REST_DEVELOPMENT_FEATURES …
#else
#define RA_EVICT_FIRST_DEVELOPMENT_FEATURE(M) …
#define RA_EVICT_REST_DEVELOPMENT_FEATURES(M) …
#endif

// The model learns to pick one of the mask == 1 interferences. This is the
// name of the output tensor. The contract with the model is that the output
// will be guaranteed to be to a mask == 1 position. Using a macro here to
// avoid 'not used' warnings (and keep cond compilation to a minimum)
#define DecisionName …
static const TensorSpec DecisionSpec = …;

// Named features index.
enum FeatureIDs { … };

// The ML advisor will typically have a sparse input to the evaluator, because
// various phys regs won't be available. It's easier (maintenance-wise) to
// bulk-reset the state of the evaluator each time we are about to use it
// again.
template <typename T> size_t getTotalSize(const std::vector<int64_t> &Shape) { … }

void resetInputs(MLModelRunner &Runner) { … }

// Per-live interval components that get aggregated into the feature values
// that will be passed to the evaluator.
struct LIFeatureComponents { … };

CandidateRegList;
FeaturesListNormalizer;

/// The ML evictor (commonalities between release and development mode)
class MLEvictAdvisor : public RegAllocEvictionAdvisor { … };

#define _DECL_FEATURES(type, name, shape, _) …

// ===================================
// Release (AOT) - specifics
// ===================================
class ReleaseModeEvictionAdvisorAnalysis final
    : public RegAllocEvictionAdvisorAnalysis { … };

// ===================================
// Development mode-specifics
// ===================================
//
// Features we log
#ifdef LLVM_HAVE_TFLITE
static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1});

// Features we bind on the model. The tensor names have a prefix, and we also
// need to include some tensors that are expected to be present by the
// training algo.
// TODO: can we just get rid of these?
#define _DECL_TRAIN_FEATURES …

class DevelopmentModeEvictAdvisor : public MLEvictAdvisor {
public:
  DevelopmentModeEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
                              MLModelRunner *Runner,
                              const MachineBlockFrequencyInfo &MBFI,
                              const MachineLoopInfo &Loops, Logger *Log)
      : MLEvictAdvisor(MF, RA, Runner, MBFI, Loops), Log(Log) {}

private:
  int64_t tryFindEvictionCandidatePosition(
      const LiveInterval &VirtReg, const AllocationOrder &Order,
      unsigned OrderLimit, uint8_t CostPerUseLimit,
      const SmallVirtRegSet &FixedRegisters) const override;

  Logger *const Log;
};

class DevelopmentModeEvictionAdvisorAnalysis final
    : public RegAllocEvictionAdvisorAnalysis {
public:
  DevelopmentModeEvictionAdvisorAnalysis()
      : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Development) {
    if (EnableDevelopmentFeatures) {
      InputFeatures = {RA_EVICT_FEATURES_LIST(
          _DECL_FEATURES) RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_FEATURES)
                           RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_FEATURES)};
      TrainingInputFeatures = {
          RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES)
              RA_EVICT_FIRST_DEVELOPMENT_FEATURE(_DECL_TRAIN_FEATURES)
                  RA_EVICT_REST_DEVELOPMENT_FEATURES(_DECL_TRAIN_FEATURES)
                      TensorSpec::createSpec<float>("action_discount", {1}),
          TensorSpec::createSpec<int32_t>("action_step_type", {1}),
          TensorSpec::createSpec<float>("action_reward", {1})};
    } else {
      InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)};
      TrainingInputFeatures = {
          RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES)
              TensorSpec::createSpec<float>("action_discount", {1}),
          TensorSpec::createSpec<int32_t>("action_step_type", {1}),
          TensorSpec::createSpec<float>("action_reward", {1})};
    }
  }
  // support for isa<> and dyn_cast.
  static bool classof(const RegAllocEvictionAdvisorAnalysis *R) {
    return R->getAdvisorMode() == AdvisorMode::Development;
  }

  void logRewardIfNeeded(const MachineFunction &MF,
                         llvm::function_ref<float()> GetReward) override {
    if (!Log || !Log->hasAnyObservationForContext(MF.getName()))
      return;
    // The function pass manager would run all the function passes for a
    // function, so we assume the last context belongs to this function. If
    // this invariant ever changes, we can implement at that time switching
    // contexts. At this point, it'd be an error
    if (Log->currentContext() != MF.getName()) {
      MF.getFunction().getContext().emitError(
          "The training log context shouldn't have had changed.");
    }
    if (Log->hasObservationInProgress())
      Log->logReward<float>(GetReward());
  }

private:
  std::vector<TensorSpec> InputFeatures;
  std::vector<TensorSpec> TrainingInputFeatures;

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
    AU.addRequired<MachineLoopInfoWrapperPass>();
    RegAllocEvictionAdvisorAnalysis::getAnalysisUsage(AU);
  }

  bool doInitialization(Module &M) override {
    LLVMContext &Ctx = M.getContext();
    if (ModelUnderTraining.empty() && TrainingLog.empty()) {
      Ctx.emitError("Regalloc development mode should be requested with at "
                    "least logging enabled and/or a training model");
      return false;
    }
    if (ModelUnderTraining.empty())
      Runner = std::make_unique<NoInferenceModelRunner>(Ctx, InputFeatures);
    else
      Runner = ModelUnderTrainingRunner::createAndEnsureValid(
          Ctx, ModelUnderTraining, DecisionName, TrainingInputFeatures);
    if (!Runner) {
      Ctx.emitError("Regalloc: could not set up the model runner");
      return false;
    }
    if (TrainingLog.empty())
      return false;
    std::error_code EC;
    auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC);
    if (EC) {
      M.getContext().emitError(EC.message() + ":" + TrainingLog);
      return false;
    }
    std::vector<TensorSpec> LFS = InputFeatures;
    if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(Runner.get()))
      append_range(LFS, MUTR->extraOutputsForLoggingSpecs());
    // We always log the output; in particular, if we're not evaluating, we
    // don't have an output spec json file. That's why we handle the
    // 'normal' output separately.
    LFS.push_back(DecisionSpec);

    Log = std::make_unique<Logger>(std::move(OS), LFS, Reward,
                                   /*IncludeReward*/ true);
    return false;
  }

  std::unique_ptr<RegAllocEvictionAdvisor>
  getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
    if (!Runner)
      return nullptr;
    if (Log)
      Log->switchContext(MF.getName());
    return std::make_unique<DevelopmentModeEvictAdvisor>(
        MF, RA, Runner.get(),
        getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(),
        getAnalysis<MachineLoopInfoWrapperPass>().getLI(), Log.get());
  }

  std::unique_ptr<MLModelRunner> Runner;
  std::unique_ptr<Logger> Log;
};

#endif //#ifdef LLVM_HAVE_TFLITE
} // namespace

float MLEvictAdvisor::getInitialQueueSize(const MachineFunction &MF) { … }

MLEvictAdvisor::MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
                               MLModelRunner *Runner,
                               const MachineBlockFrequencyInfo &MBFI,
                               const MachineLoopInfo &Loops)
    : … { … }

int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition(
    const LiveInterval &, const AllocationOrder &, unsigned, uint8_t,
    const SmallVirtRegSet &) const { … }

bool MLEvictAdvisor::loadInterferenceFeatures(
    const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
    const SmallVirtRegSet &FixedRegisters,
    llvm::SmallVectorImpl<float> &Largest, size_t Pos,
    llvm::SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const { … }

MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
    const LiveInterval &VirtReg, const AllocationOrder &Order,
    uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const { … }

const LIFeatureComponents &
MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const { … }

// Overall, this currently mimics what we do for weight calculation, but instead
// of accummulating the various features, we keep them separate.
void MLEvictAdvisor::extractFeatures(
    const SmallVectorImpl<const LiveInterval *> &Intervals,
    llvm::SmallVectorImpl<float> &Largest, size_t Pos, int64_t IsHint,
    int64_t LocalIntfsCount, float NumUrgent,
    SmallVectorImpl<LRStartEndInfo> &LRPosInfo) const { … }

void llvm::extractInstructionFeatures(
    SmallVectorImpl<LRStartEndInfo> &LRPosInfo, MLModelRunner *RegallocRunner,
    function_ref<int(SlotIndex)> GetOpcode,
    function_ref<float(SlotIndex)> GetMBBFreq,
    function_ref<MachineBasicBlock *(SlotIndex)> GetMBBReference,
    const int InstructionsIndex, const int InstructionsMappingIndex,
    const int MBBFreqIndex, const int MBBMappingIndex,
    const SlotIndex LastIndex) { … }

void llvm::extractMBBFrequency(
    const SlotIndex CurrentIndex, const size_t CurrentInstructionIndex,
    std::map<MachineBasicBlock *, size_t> &VisitedMBBs,
    function_ref<float(SlotIndex)> GetMBBFreq,
    MachineBasicBlock *CurrentMBBReference, MLModelRunner *RegallocRunner,
    const int MBBFreqIndex, const int MBBMappingIndex) { … }

// Development mode-specific implementations
#ifdef LLVM_HAVE_TFLITE

RegAllocEvictionAdvisorAnalysis *llvm::createDevelopmentModeAdvisor() {
  return new DevelopmentModeEvictionAdvisorAnalysis();
}

int64_t DevelopmentModeEvictAdvisor::tryFindEvictionCandidatePosition(
    const LiveInterval &VirtReg, const AllocationOrder &Order,
    unsigned OrderLimit, uint8_t CostPerUseLimit,
    const SmallVirtRegSet &FixedRegisters) const {
  int64_t Ret = 0;
  if (isa<ModelUnderTrainingRunner>(getRunner())) {
    Ret = MLEvictAdvisor::tryFindEvictionCandidatePosition(
        VirtReg, Order, OrderLimit, CostPerUseLimit, FixedRegisters);
  } else {
    MCRegister PhysReg = getDefaultAdvisor().tryFindEvictionCandidate(
        VirtReg, Order, CostPerUseLimit, FixedRegisters);
    // Find the index of the selected PhysReg. We need it for logging,
    // otherwise this is wasted cycles (but so would starting development mode
    // without a model nor logging)
    if (!PhysReg)
      Ret = CandidateVirtRegPos;
    else
      for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit);
           I != E; ++I, ++Ret)
        if (*I == PhysReg)
          break;
  }
  if (TrainingLog.empty())
    return Ret;
  // TODO(mtrofin): when we support optional rewards, this can go away. In the
  // meantime, we log the "pretend" reward (0) for the previous observation
  // before starting a new one.
  if (Log->hasObservationInProgress())
    Log->logReward<float>(0.0);

  Log->startObservation();
  size_t CurrentFeature = 0;
  size_t FeatureCount = EnableDevelopmentFeatures
                            ? FeatureIDs::FeaturesWithDevelopmentCount
                            : FeatureIDs::FeatureCount;
  for (; CurrentFeature < FeatureCount; ++CurrentFeature) {
    Log->logTensorValue(CurrentFeature,
                        reinterpret_cast<const char *>(
                            getRunner().getTensorUntyped(CurrentFeature)));
  }
  if (auto *MUTR = dyn_cast<ModelUnderTrainingRunner>(&getRunner()))
    for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size();
         ++I, ++CurrentFeature)
      Log->logTensorValue(
          CurrentFeature,
          reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I)));
  // The output is right after the features and the extra outputs
  Log->logTensorValue(CurrentFeature, reinterpret_cast<const char *>(&Ret));
  Log->endObservation();
  return Ret;
}

bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) {
  std::optional<float> CachedReward;
  auto GetReward = [&]() {
    if (!CachedReward)
      CachedReward = static_cast<float>(
          calculateRegAllocScore(
              MF, getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI())
              .getScore());
    return *CachedReward;
  };

  getAnalysis<RegAllocEvictionAdvisorAnalysis>().logRewardIfNeeded(MF,
                                                                   GetReward);
  getAnalysis<RegAllocPriorityAdvisorAnalysis>().logRewardIfNeeded(MF,
                                                                   GetReward);
  return false;
}
#endif // #ifdef LLVM_HAVE_TFLITE

RegAllocEvictionAdvisorAnalysis *llvm::createReleaseModeAdvisor() { … }

// In all cases except development mode, we don't need scoring.
#if !defined(LLVM_HAVE_TFLITE)
bool RegAllocScoring::runOnMachineFunction(MachineFunction &) { … }
#endif
llvm/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp