llvm/polly/lib/Transform/ScheduleOptimizer.cpp

//===- ScheduleOptimizer.cpp - Calculate an optimized schedule ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass generates an entirely new schedule tree from the data dependences
// and iteration domains. The new schedule tree is computed in two steps:
//
// 1) The isl scheduling optimizer is run
//
// The isl scheduling optimizer creates a new schedule tree that maximizes
// parallelism and tileability and minimizes data-dependence distances. The
// algorithm used is a modified version of the ``Pluto'' algorithm:
//
//   U. Bondhugula, A. Hartono, J. Ramanujam, and P. Sadayappan.
//   A Practical Automatic Polyhedral Parallelizer and Locality Optimizer.
//   In Proceedings of the 2008 ACM SIGPLAN Conference On Programming Language
//   Design and Implementation, PLDI ’08, pages 101–113. ACM, 2008.
//
// 2) A set of post-scheduling transformations is applied on the schedule tree.
//
// These optimizations include:
//
//  - Tiling of the innermost tilable bands
//  - Prevectorization - The choice of a possible outer loop that is strip-mined
//                       to the innermost level to enable inner-loop
//                       vectorization.
//  - Some optimizations for spatial locality are also planned.
//
// For a detailed description of the schedule tree itself please see section 6
// of:
//
// Polyhedral AST generation is more than scanning polyhedra
// Tobias Grosser, Sven Verdoolaege, Albert Cohen
// ACM Transactions on Programming Languages and Systems (TOPLAS),
// 37(4), July 2015
// http://www.grosser.es/#pub-polyhedral-AST-generation
//
// This publication also contains a detailed discussion of the different options
// for polyhedral loop unrolling, full/partial tile separation and other uses
// of the schedule tree.
//
//===----------------------------------------------------------------------===//

#include "polly/ScheduleOptimizer.h"
#include "polly/CodeGen/CodeGeneration.h"
#include "polly/DependenceInfo.h"
#include "polly/ManualOptimizer.h"
#include "polly/MatmulOptimizer.h"
#include "polly/Options.h"
#include "polly/ScheduleTreeTransform.h"
#include "polly/Support/ISLOStream.h"
#include "polly/Support/ISLTools.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "isl/options.h"

usingnamespacellvm;
usingnamespacepolly;

namespace llvm {
class Loop;
class Module;
} // namespace llvm

#include "polly/Support/PollyDebug.h"
#define DEBUG_TYPE

static cl::opt<std::string>
    OptimizeDeps("polly-opt-optimize-only",
                 cl::desc("Only a certain kind of dependences (all/raw)"),
                 cl::Hidden, cl::init("all"), cl::cat(PollyCategory));

static cl::opt<std::string>
    SimplifyDeps("polly-opt-simplify-deps",
                 cl::desc("Dependences should be simplified (yes/no)"),
                 cl::Hidden, cl::init("yes"), cl::cat(PollyCategory));

static cl::opt<int> MaxConstantTerm(
    "polly-opt-max-constant-term",
    cl::desc("The maximal constant term allowed (-1 is unlimited)"), cl::Hidden,
    cl::init(20), cl::cat(PollyCategory));

static cl::opt<int> MaxCoefficient(
    "polly-opt-max-coefficient",
    cl::desc("The maximal coefficient allowed (-1 is unlimited)"), cl::Hidden,
    cl::init(20), cl::cat(PollyCategory));

static cl::opt<std::string>
    MaximizeBandDepth("polly-opt-maximize-bands",
                      cl::desc("Maximize the band depth (yes/no)"), cl::Hidden,
                      cl::init("yes"), cl::cat(PollyCategory));

static cl::opt<int>
    ScheduleComputeOut("polly-schedule-computeout",
                       cl::desc("Bound the scheduler by maximal amount"
                                "of computational steps. "),
                       cl::Hidden, cl::init(300000), cl::ZeroOrMore,
                       cl::cat(PollyCategory));

static cl::opt<bool>
    GreedyFusion("polly-loopfusion-greedy",
                 cl::desc("Aggressively try to fuse everything"), cl::Hidden,
                 cl::cat(PollyCategory));

static cl::opt<std::string> OuterCoincidence(
    "polly-opt-outer-coincidence",
    cl::desc("Try to construct schedules where the outer member of each band "
             "satisfies the coincidence constraints (yes/no)"),
    cl::Hidden, cl::init("no"), cl::cat(PollyCategory));

static cl::opt<int> PrevectorWidth(
    "polly-prevect-width",
    cl::desc(
        "The number of loop iterations to strip-mine for pre-vectorization"),
    cl::Hidden, cl::init(4), cl::cat(PollyCategory));

static cl::opt<bool> FirstLevelTiling("polly-tiling",
                                      cl::desc("Enable loop tiling"),
                                      cl::init(true), cl::cat(PollyCategory));

static cl::opt<int> FirstLevelDefaultTileSize(
    "polly-default-tile-size",
    cl::desc("The default tile size (if not enough were provided by"
             " --polly-tile-sizes)"),
    cl::Hidden, cl::init(32), cl::cat(PollyCategory));

static cl::list<int>
    FirstLevelTileSizes("polly-tile-sizes",
                        cl::desc("A tile size for each loop dimension, filled "
                                 "with --polly-default-tile-size"),
                        cl::Hidden, cl::CommaSeparated, cl::cat(PollyCategory));

static cl::opt<bool>
    SecondLevelTiling("polly-2nd-level-tiling",
                      cl::desc("Enable a 2nd level loop of loop tiling"),
                      cl::cat(PollyCategory));

static cl::opt<int> SecondLevelDefaultTileSize(
    "polly-2nd-level-default-tile-size",
    cl::desc("The default 2nd-level tile size (if not enough were provided by"
             " --polly-2nd-level-tile-sizes)"),
    cl::Hidden, cl::init(16), cl::cat(PollyCategory));

static cl::list<int>
    SecondLevelTileSizes("polly-2nd-level-tile-sizes",
                         cl::desc("A tile size for each loop dimension, filled "
                                  "with --polly-default-tile-size"),
                         cl::Hidden, cl::CommaSeparated,
                         cl::cat(PollyCategory));

static cl::opt<bool> RegisterTiling("polly-register-tiling",
                                    cl::desc("Enable register tiling"),
                                    cl::cat(PollyCategory));

static cl::opt<int> RegisterDefaultTileSize(
    "polly-register-tiling-default-tile-size",
    cl::desc("The default register tile size (if not enough were provided by"
             " --polly-register-tile-sizes)"),
    cl::Hidden, cl::init(2), cl::cat(PollyCategory));

static cl::list<int>
    RegisterTileSizes("polly-register-tile-sizes",
                      cl::desc("A tile size for each loop dimension, filled "
                               "with --polly-register-tile-size"),
                      cl::Hidden, cl::CommaSeparated, cl::cat(PollyCategory));

static cl::opt<bool> PragmaBasedOpts(
    "polly-pragma-based-opts",
    cl::desc("Apply user-directed transformation from metadata"),
    cl::init(true), cl::cat(PollyCategory));

static cl::opt<bool> EnableReschedule("polly-reschedule",
                                      cl::desc("Optimize SCoPs using ISL"),
                                      cl::init(true), cl::cat(PollyCategory));

static cl::opt<bool>
    PMBasedOpts("polly-pattern-matching-based-opts",
                cl::desc("Perform optimizations based on pattern matching"),
                cl::init(true), cl::cat(PollyCategory));

static cl::opt<bool>
    EnablePostopts("polly-postopts",
                   cl::desc("Apply post-rescheduling optimizations such as "
                            "tiling (requires -polly-reschedule)"),
                   cl::init(true), cl::cat(PollyCategory));

static cl::opt<bool> OptimizedScops(
    "polly-optimized-scops",
    cl::desc("Polly - Dump polyhedral description of Scops optimized with "
             "the isl scheduling optimizer and the set of post-scheduling "
             "transformations is applied on the schedule tree"),
    cl::cat(PollyCategory));

STATISTIC(ScopsProcessed, "Number of scops processed");
STATISTIC(ScopsRescheduled, "Number of scops rescheduled");
STATISTIC(ScopsOptimized, "Number of scops optimized");

STATISTIC(NumAffineLoopsOptimized, "Number of affine loops optimized");
STATISTIC(NumBoxedLoopsOptimized, "Number of boxed loops optimized");

#define THREE_STATISTICS(VARNAME, DESC)

THREE_STATISTICS(NumBands, "Number of bands");
THREE_STATISTICS(NumBandMembers, "Number of band members");
THREE_STATISTICS(NumCoincident, "Number of coincident band members");
THREE_STATISTICS(NumPermutable, "Number of permutable bands");
THREE_STATISTICS(NumFilters, "Number of filter nodes");
THREE_STATISTICS(NumExtension, "Number of extension nodes");

STATISTIC(FirstLevelTileOpts, "Number of first level tiling applied");
STATISTIC(SecondLevelTileOpts, "Number of second level tiling applied");
STATISTIC(RegisterTileOpts, "Number of register tiling applied");
STATISTIC(PrevectOpts, "Number of strip-mining for prevectorization applied");
STATISTIC(MatMulOpts,
          "Number of matrix multiplication patterns detected and optimized");

namespace {
/// Additional parameters of the schedule optimizer.
///
/// Target Transform Info and the SCoP dependencies used by the schedule
/// optimizer.
struct OptimizerAdditionalInfoTy {};

class ScheduleTreeOptimizer final {};

isl::schedule_node
ScheduleTreeOptimizer::isolateFullPartialTiles(isl::schedule_node Node,
                                               int VectorWidth) {}

struct InsertSimdMarkers final : ScheduleNodeRewriter<InsertSimdMarkers> {};

isl::schedule_node ScheduleTreeOptimizer::prevectSchedBand(
    isl::schedule_node Node, unsigned DimToVectorize, int VectorWidth) {}

static bool isSimpleInnermostBand(const isl::schedule_node &Node) {}

/// Check if this node is a band node, which has only one child.
///
/// @param Node The node to check.
static bool isOneTimeParentBandNode(isl::schedule_node Node) {}

bool ScheduleTreeOptimizer::isTileableBandNode(isl::schedule_node Node) {}

bool ScheduleTreeOptimizer::isPMOptimizableBandNode(isl::schedule_node Node) {}

__isl_give isl::schedule_node
ScheduleTreeOptimizer::applyTileBandOpt(isl::schedule_node Node) {}

isl::schedule_node
ScheduleTreeOptimizer::applyPrevectBandOpt(isl::schedule_node Node) {}

__isl_give isl_schedule_node *
ScheduleTreeOptimizer::optimizeBand(__isl_take isl_schedule_node *NodeArg,
                                    void *User) {}

isl::schedule
ScheduleTreeOptimizer::optimizeSchedule(isl::schedule Schedule,
                                        const OptimizerAdditionalInfoTy *OAI) {}

isl::schedule_node ScheduleTreeOptimizer::optimizeScheduleNode(
    isl::schedule_node Node, const OptimizerAdditionalInfoTy *OAI) {}

bool ScheduleTreeOptimizer::isProfitableSchedule(Scop &S,
                                                 isl::schedule NewSchedule) {}

class IslScheduleOptimizerWrapperPass final : public ScopPass {};

char IslScheduleOptimizerWrapperPass::ID =;

#ifndef NDEBUG
static void printSchedule(llvm::raw_ostream &OS, const isl::schedule &Schedule,
                          StringRef Desc) {
  isl::ctx Ctx = Schedule.ctx();
  isl_printer *P = isl_printer_to_str(Ctx.get());
  P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK);
  P = isl_printer_print_schedule(P, Schedule.get());
  char *Str = isl_printer_get_str(P);
  OS << Desc << ": \n" << Str << "\n";
  free(Str);
  isl_printer_free(P);
}
#endif

/// Collect statistics for the schedule tree.
///
/// @param Schedule The schedule tree to analyze. If not a schedule tree it is
/// ignored.
/// @param Version  The version of the schedule tree that is analyzed.
///                 0 for the original schedule tree before any transformation.
///                 1 for the schedule tree after isl's rescheduling.
///                 2 for the schedule tree after optimizations are applied
///                 (tiling, pattern matching)
static void walkScheduleTreeForStatistics(isl::schedule Schedule, int Version) {}

static void runIslScheduleOptimizer(
    Scop &S,
    function_ref<const Dependences &(Dependences::AnalysisLevel)> GetDeps,
    TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE,
    isl::schedule &LastSchedule, bool &DepsChanged) {}

bool IslScheduleOptimizerWrapperPass::runOnScop(Scop &S) {}

static void runScheduleOptimizerPrinter(raw_ostream &OS,
                                        isl::schedule LastSchedule) {}

void IslScheduleOptimizerWrapperPass::printScop(raw_ostream &OS, Scop &) const {}

void IslScheduleOptimizerWrapperPass::getAnalysisUsage(
    AnalysisUsage &AU) const {}

} // namespace

Pass *polly::createIslScheduleOptimizerWrapperPass() {}

INITIALIZE_PASS_BEGIN(IslScheduleOptimizerWrapperPass, "polly-opt-isl",
                      "Polly - Optimize schedule of SCoP", false, false);
INITIALIZE_PASS_DEPENDENCY(DependenceInfo);
INITIALIZE_PASS_DEPENDENCY(ScopInfoRegionPass);
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass);
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass);
INITIALIZE_PASS_END(IslScheduleOptimizerWrapperPass, "polly-opt-isl",
                    "Polly - Optimize schedule of SCoP", false, false)

static llvm::PreservedAnalyses
runIslScheduleOptimizerUsingNPM(Scop &S, ScopAnalysisManager &SAM,
                                ScopStandardAnalysisResults &SAR, SPMUpdater &U,
                                raw_ostream *OS) {}

llvm::PreservedAnalyses
IslScheduleOptimizerPass::run(Scop &S, ScopAnalysisManager &SAM,
                              ScopStandardAnalysisResults &SAR, SPMUpdater &U) {}

llvm::PreservedAnalyses
IslScheduleOptimizerPrinterPass::run(Scop &S, ScopAnalysisManager &SAM,
                                     ScopStandardAnalysisResults &SAR,
                                     SPMUpdater &U) {}

//===----------------------------------------------------------------------===//

namespace {
/// Print result from IslScheduleOptimizerWrapperPass.
class IslScheduleOptimizerPrinterLegacyPass final : public ScopPass {};

char IslScheduleOptimizerPrinterLegacyPass::ID =;
} // namespace

Pass *polly::createIslScheduleOptimizerPrinterLegacyPass(raw_ostream &OS) {}

INITIALIZE_PASS_BEGIN(IslScheduleOptimizerPrinterLegacyPass,
                      "polly-print-opt-isl",
                      "Polly - Print optimizer schedule of SCoP", false, false);
INITIALIZE_PASS_DEPENDENCY(IslScheduleOptimizerWrapperPass)
INITIALIZE_PASS_END(IslScheduleOptimizerPrinterLegacyPass,
                    "polly-print-opt-isl",
                    "Polly - Print optimizer schedule of SCoP", false, false)