llvm/llvm/lib/Transforms/IPO/FunctionImport.cpp

//===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements Function import based on summaries.
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/IPO/FunctionImport.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/IRMover.h"
#include "llvm/ProfileData/PGOCtxProfReader.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <cassert>
#include <memory>
#include <string>
#include <system_error>
#include <tuple>
#include <utility>

usingnamespacellvm;

#define DEBUG_TYPE

STATISTIC(NumImportedFunctionsThinLink,
          "Number of functions thin link decided to import");
STATISTIC(NumImportedHotFunctionsThinLink,
          "Number of hot functions thin link decided to import");
STATISTIC(NumImportedCriticalFunctionsThinLink,
          "Number of critical functions thin link decided to import");
STATISTIC(NumImportedGlobalVarsThinLink,
          "Number of global variables thin link decided to import");
STATISTIC(NumImportedFunctions, "Number of functions imported in backend");
STATISTIC(NumImportedGlobalVars,
          "Number of global variables imported in backend");
STATISTIC(NumImportedModules, "Number of modules imported from");
STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");
STATISTIC(NumLiveSymbols, "Number of live symbols in index");

/// Limit on instruction count of imported functions.
static cl::opt<unsigned> ImportInstrLimit(
    "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
    cl::desc("Only import functions with less than N instructions"));

static cl::opt<int> ImportCutoff(
    "import-cutoff", cl::init(-1), cl::Hidden, cl::value_desc("N"),
    cl::desc("Only import first N functions if N>=0 (default -1)"));

static cl::opt<bool>
    ForceImportAll("force-import-all", cl::init(false), cl::Hidden,
                   cl::desc("Import functions with noinline attribute"));

static cl::opt<float>
    ImportInstrFactor("import-instr-evolution-factor", cl::init(0.7),
                      cl::Hidden, cl::value_desc("x"),
                      cl::desc("As we import functions, multiply the "
                               "`import-instr-limit` threshold by this factor "
                               "before processing newly imported functions"));

static cl::opt<float> ImportHotInstrFactor(
    "import-hot-evolution-factor", cl::init(1.0), cl::Hidden,
    cl::value_desc("x"),
    cl::desc("As we import functions called from hot callsite, multiply the "
             "`import-instr-limit` threshold by this factor "
             "before processing newly imported functions"));

static cl::opt<float> ImportHotMultiplier(
    "import-hot-multiplier", cl::init(10.0), cl::Hidden, cl::value_desc("x"),
    cl::desc("Multiply the `import-instr-limit` threshold for hot callsites"));

static cl::opt<float> ImportCriticalMultiplier(
    "import-critical-multiplier", cl::init(100.0), cl::Hidden,
    cl::value_desc("x"),
    cl::desc(
        "Multiply the `import-instr-limit` threshold for critical callsites"));

// FIXME: This multiplier was not really tuned up.
static cl::opt<float> ImportColdMultiplier(
    "import-cold-multiplier", cl::init(0), cl::Hidden, cl::value_desc("N"),
    cl::desc("Multiply the `import-instr-limit` threshold for cold callsites"));

static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
                                  cl::desc("Print imported functions"));

static cl::opt<bool> PrintImportFailures(
    "print-import-failures", cl::init(false), cl::Hidden,
    cl::desc("Print information for functions rejected for importing"));

static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
                                 cl::desc("Compute dead symbols"));

static cl::opt<bool> EnableImportMetadata(
    "enable-import-metadata", cl::init(false), cl::Hidden,
    cl::desc("Enable import metadata like 'thinlto_src_module' and "
             "'thinlto_src_file'"));

/// Summary file to use for function importing when using -function-import from
/// the command line.
static cl::opt<std::string>
    SummaryFile("summary-file",
                cl::desc("The summary file to use for function importing."));

/// Used when testing importing from distributed indexes via opt
// -function-import.
static cl::opt<bool>
    ImportAllIndex("import-all-index",
                   cl::desc("Import all external functions in index."));

/// This is a test-only option.
/// If this option is enabled, the ThinLTO indexing step will import each
/// function declaration as a fallback. In a real build this may increase ram
/// usage of the indexing step unnecessarily.
/// TODO: Implement selective import (based on combined summary analysis) to
/// ensure the imported function has a use case in the postlink pipeline.
static cl::opt<bool> ImportDeclaration(
    "import-declaration", cl::init(false), cl::Hidden,
    cl::desc("If true, import function declaration as fallback if the function "
             "definition is not imported."));

/// Pass a workload description file - an example of workload would be the
/// functions executed to satisfy a RPC request. A workload is defined by a root
/// function and the list of functions that are (frequently) needed to satisfy
/// it. The module that defines the root will have all those functions imported.
/// The file contains a JSON dictionary. The keys are root functions, the values
/// are lists of functions to import in the module defining the root. It is
/// assumed -funique-internal-linkage-names was used, thus ensuring function
/// names are unique even for local linkage ones.
static cl::opt<std::string> WorkloadDefinitions(
    "thinlto-workload-def",
    cl::desc("Pass a workload definition. This is a file containing a JSON "
             "dictionary. The keys are root functions, the values are lists of "
             "functions to import in the module defining the root. It is "
             "assumed -funique-internal-linkage-names was used, to ensure "
             "local linkage functions have unique names. For example: \n"
             "{\n"
             "  \"rootFunction_1\": [\"function_to_import_1\", "
             "\"function_to_import_2\"], \n"
             "  \"rootFunction_2\": [\"function_to_import_3\", "
             "\"function_to_import_4\"] \n"
             "}"),
    cl::Hidden);

extern cl::opt<std::string> UseCtxProfile;

namespace llvm {
extern cl::opt<bool> EnableMemProfContextDisambiguation;
}

// Load lazily a module from \p FileName in \p Context.
static std::unique_ptr<Module> loadFile(const std::string &FileName,
                                        LLVMContext &Context) {}

static bool shouldSkipLocalInAnotherModule(const GlobalValueSummary *RefSummary,
                                           size_t NumDefs,
                                           StringRef ImporterModule) {}

/// Given a list of possible callee implementation for a call site, qualify the
/// legality of importing each. The return is a range of pairs. Each pair
/// corresponds to a candidate. The first value is the ImportFailureReason for
/// that candidate, the second is the candidate.
static auto qualifyCalleeCandidates(
    const ModuleSummaryIndex &Index,
    ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
    StringRef CallerModulePath) {}

/// Given a list of possible callee implementation for a call site, select one
/// that fits the \p Threshold for function definition import. If none are
/// found, the Reason will give the last reason for the failure (last, in the
/// order of CalleeSummaryList entries). While looking for a callee definition,
/// sets \p TooLargeOrNoInlineSummary to the last seen too-large or noinline
/// candidate; other modules may want to know the function summary or
/// declaration even if a definition is not needed.
///
/// FIXME: select "best" instead of first that fits. But what is "best"?
/// - The smallest: more likely to be inlined.
/// - The one with the least outgoing edges (already well optimized).
/// - One from a module already being imported from in order to reduce the
///   number of source modules parsed/linked.
/// - One that has PGO data attached.
/// - [insert you fancy metric here]
static const GlobalValueSummary *
selectCallee(const ModuleSummaryIndex &Index,
             ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
             unsigned Threshold, StringRef CallerModulePath,
             const GlobalValueSummary *&TooLargeOrNoInlineSummary,
             FunctionImporter::ImportFailureReason &Reason) {}

namespace {

EdgeInfo;

} // anonymous namespace

FunctionImporter::ImportMapTy::AddDefinitionStatus
FunctionImporter::ImportMapTy::addDefinition(StringRef FromModule,
                                             GlobalValue::GUID GUID) {}

void FunctionImporter::ImportMapTy::maybeAddDeclaration(
    StringRef FromModule, GlobalValue::GUID GUID) {}

SmallVector<StringRef, 0>
FunctionImporter::ImportMapTy::getSourceModules() const {}

std::optional<GlobalValueSummary::ImportKind>
FunctionImporter::ImportMapTy::getImportType(StringRef FromModule,
                                             GlobalValue::GUID GUID) const {}

/// Import globals referenced by a function or other globals that are being
/// imported, if importing such global is possible.
class GlobalsImporter final {};

static const char *getFailureName(FunctionImporter::ImportFailureReason Reason);

/// Determine the list of imports and exports for each module.
class ModuleImportsManager {};

/// A ModuleImportsManager that operates based on a workload definition (see
/// -thinlto-workload-def). For modules that do not define workload roots, it
/// applies the base ModuleImportsManager import policy.
class WorkloadImportsManager : public ModuleImportsManager {};

std::unique_ptr<ModuleImportsManager> ModuleImportsManager::create(
    function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
        IsPrevailing,
    const ModuleSummaryIndex &Index,
    DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) {}

static const char *
getFailureName(FunctionImporter::ImportFailureReason Reason) {}

/// Compute the list of functions to import for a given caller. Mark these
/// imported functions and the symbols they reference in their source module as
/// exported from their source module.
static void computeImportForFunction(
    const FunctionSummary &Summary, const ModuleSummaryIndex &Index,
    const unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries,
    function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
        isPrevailing,
    SmallVectorImpl<EdgeInfo> &Worklist, GlobalsImporter &GVImporter,
    FunctionImporter::ImportMapTy &ImportList,
    DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists,
    FunctionImporter::ImportThresholdsTy &ImportThresholds) {}

void ModuleImportsManager::computeImportForModule(
    const GVSummaryMapTy &DefinedGVSummaries, StringRef ModName,
    FunctionImporter::ImportMapTy &ImportList) {}

#ifndef NDEBUG
static bool isGlobalVarSummary(const ModuleSummaryIndex &Index, ValueInfo VI) {
  auto SL = VI.getSummaryList();
  return SL.empty()
             ? false
             : SL[0]->getSummaryKind() == GlobalValueSummary::GlobalVarKind;
}

static bool isGlobalVarSummary(const ModuleSummaryIndex &Index,
                               GlobalValue::GUID G) {
  if (const auto &VI = Index.getValueInfo(G))
    return isGlobalVarSummary(Index, VI);
  return false;
}

// Return the number of global variable summaries in ExportSet.
static unsigned
numGlobalVarSummaries(const ModuleSummaryIndex &Index,
                      FunctionImporter::ExportSetTy &ExportSet) {
  unsigned NumGVS = 0;
  for (auto &VI : ExportSet)
    if (isGlobalVarSummary(Index, VI.getGUID()))
      ++NumGVS;
  return NumGVS;
}

struct ImportStatistics {
  unsigned NumGVS = 0;
  unsigned DefinedFS = 0;
  unsigned Count = 0;
};

// Compute import statistics for each source module in ImportList.
static DenseMap<StringRef, ImportStatistics>
collectImportStatistics(const ModuleSummaryIndex &Index,
                        const FunctionImporter::ImportMapTy &ImportList) {
  DenseMap<StringRef, ImportStatistics> Histogram;

  for (const auto &[FromModule, GUID, Type] : ImportList) {
    ImportStatistics &Entry = Histogram[FromModule];
    ++Entry.Count;
    if (isGlobalVarSummary(Index, GUID))
      ++Entry.NumGVS;
    else if (Type == GlobalValueSummary::Definition)
      ++Entry.DefinedFS;
  }
  return Histogram;
}
#endif

#ifndef NDEBUG
static bool checkVariableImport(
    const ModuleSummaryIndex &Index,
    FunctionImporter::ImportListsTy &ImportLists,
    DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
  DenseSet<GlobalValue::GUID> FlattenedImports;

  for (const auto &ImportPerModule : ImportLists)
    for (const auto &[FromModule, GUID, ImportType] : ImportPerModule.second)
      FlattenedImports.insert(GUID);

  // Checks that all GUIDs of read/writeonly vars we see in export lists
  // are also in the import lists. Otherwise we my face linker undefs,
  // because readonly and writeonly vars are internalized in their
  // source modules. The exception would be if it has a linkage type indicating
  // that there may have been a copy existing in the importing module (e.g.
  // linkonce_odr). In that case we cannot accurately do this checking.
  auto IsReadOrWriteOnlyVarNeedingImporting = [&](StringRef ModulePath,
                                                  const ValueInfo &VI) {
    auto *GVS = dyn_cast_or_null<GlobalVarSummary>(
        Index.findSummaryInModule(VI, ModulePath));
    return GVS && (Index.isReadOnly(GVS) || Index.isWriteOnly(GVS)) &&
           !(GVS->linkage() == GlobalValue::AvailableExternallyLinkage ||
             GVS->linkage() == GlobalValue::WeakODRLinkage ||
             GVS->linkage() == GlobalValue::LinkOnceODRLinkage);
  };

  for (auto &ExportPerModule : ExportLists)
    for (auto &VI : ExportPerModule.second)
      if (!FlattenedImports.count(VI.getGUID()) &&
          IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first, VI))
        return false;

  return true;
}
#endif

/// Compute all the import and export for every module using the Index.
void llvm::ComputeCrossModuleImport(
    const ModuleSummaryIndex &Index,
    const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
    function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
        isPrevailing,
    FunctionImporter::ImportListsTy &ImportLists,
    DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {}

#ifndef NDEBUG
static void dumpImportListForModule(const ModuleSummaryIndex &Index,
                                    StringRef ModulePath,
                                    FunctionImporter::ImportMapTy &ImportList) {
  DenseMap<StringRef, ImportStatistics> Histogram =
      collectImportStatistics(Index, ImportList);
  LLVM_DEBUG(dbgs() << "* Module " << ModulePath << " imports from "
                    << Histogram.size() << " modules.\n");
  for (const auto &[SrcModName, Stats] : Histogram) {
    LLVM_DEBUG(dbgs() << " - " << Stats.DefinedFS
                      << " function definitions and "
                      << Stats.Count - Stats.DefinedFS - Stats.NumGVS
                      << " function declarations imported from " << SrcModName
                      << "\n");
    LLVM_DEBUG(dbgs() << " - " << Stats.NumGVS << " vars imported from "
                      << SrcModName << "\n");
  }
}
#endif

/// Compute all the imports for the given module using the Index.
///
/// \p isPrevailing is a callback that will be called with a global value's GUID
/// and summary and should return whether the module corresponding to the
/// summary contains the linker-prevailing copy of that value.
///
/// \p ImportList will be populated with a map that can be passed to
/// FunctionImporter::importFunctions() above (see description there).
static void ComputeCrossModuleImportForModuleForTest(
    StringRef ModulePath,
    function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
        isPrevailing,
    const ModuleSummaryIndex &Index,
    FunctionImporter::ImportMapTy &ImportList) {}

/// Mark all external summaries in \p Index for import into the given module.
/// Used for testing the case of distributed builds using a distributed index.
///
/// \p ImportList will be populated with a map that can be passed to
/// FunctionImporter::importFunctions() above (see description there).
static void ComputeCrossModuleImportForModuleFromIndexForTest(
    StringRef ModulePath, const ModuleSummaryIndex &Index,
    FunctionImporter::ImportMapTy &ImportList) {}

// For SamplePGO, the indirect call targets for local functions will
// have its original name annotated in profile. We try to find the
// corresponding PGOFuncName as the GUID, and fix up the edges
// accordingly.
void updateValueInfoForIndirectCalls(ModuleSummaryIndex &Index,
                                     FunctionSummary *FS) {}

void llvm::updateIndirectCalls(ModuleSummaryIndex &Index) {}

void llvm::computeDeadSymbolsAndUpdateIndirectCalls(
    ModuleSummaryIndex &Index,
    const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
    function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing) {}

// Compute dead symbols and propagate constants in combined index.
void llvm::computeDeadSymbolsWithConstProp(
    ModuleSummaryIndex &Index,
    const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
    function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
    bool ImportEnabled) {}

/// Compute the set of summaries needed for a ThinLTO backend compilation of
/// \p ModulePath.
void llvm::gatherImportedSummariesForModule(
    StringRef ModulePath,
    const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
    const FunctionImporter::ImportMapTy &ImportList,
    ModuleToSummariesForIndexTy &ModuleToSummariesForIndex,
    GVSummaryPtrSet &DecSummaries) {}

/// Emit the files \p ModulePath will import from into \p OutputFilename.
Error llvm::EmitImportsFiles(
    StringRef ModulePath, StringRef OutputFilename,
    const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex) {}

bool llvm::convertToDeclaration(GlobalValue &GV) {}

void llvm::thinLTOFinalizeInModule(Module &TheModule,
                                   const GVSummaryMapTy &DefinedGlobals,
                                   bool PropagateAttrs) {}

/// Run internalization on \p TheModule based on symmary analysis.
void llvm::thinLTOInternalizeModule(Module &TheModule,
                                    const GVSummaryMapTy &DefinedGlobals) {}

/// Make alias a clone of its aliasee.
static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {}

// Internalize values that we marked with specific attribute
// in processGlobalForThinLTO.
static void internalizeGVsAfterImport(Module &M) {}

// Automatically import functions in Module \p DestModule based on the summaries
// index.
Expected<bool> FunctionImporter::importFunctions(
    Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) {}

static bool doImportingForModuleForTest(
    Module &M, function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
                   isPrevailing) {}

PreservedAnalyses FunctionImportPass::run(Module &M,
                                          ModuleAnalysisManager &AM) {}