//===- MatmulOptimizer.h -------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef POLLY_MATMULOPTIMIZER_H #define POLLY_MATMULOPTIMIZER_H #include "isl/isl-noexceptions.h" namespace llvm { class TargetTransformInfo; } namespace polly { class Dependences; /// Apply the BLIS matmul optimization pattern if possible. /// /// Make the loops containing the matrix multiplication be the innermost /// loops and apply the BLIS matmul optimization pattern. BLIS implements /// gemm as three nested loops around a macro-kernel, plus two packing /// routines. The macro-kernel is implemented in terms of two additional /// loops around a micro-kernel. The micro-kernel is a loop around a rank-1 /// (i.e., outer product) update. /// /// For a detailed description please see [1]. /// /// The order of the loops defines the data reused in the BLIS implementation /// of gemm ([1]). In particular, elements of the matrix B, the second /// operand of matrix multiplication, are reused between iterations of the /// innermost loop. To keep the reused data in cache, only elements of matrix /// A, the first operand of matrix multiplication, should be evicted during /// an iteration of the innermost loop. To provide such a cache replacement /// policy, elements of the matrix A can, in particular, be loaded first and, /// consequently, be least-recently-used. /// /// In our case matrices are stored in row-major order instead of /// column-major order used in the BLIS implementation ([1]). It affects only /// on the form of the BLIS micro kernel and the computation of its /// parameters. In particular, reused elements of the matrix B are /// successively multiplied by specific elements of the matrix A. /// /// Refs.: /// [1] - Analytical Modeling is Enough for High Performance BLIS /// Tze Meng Low, Francisco D Igual, Tyler M Smith, Enrique S Quintana-Orti /// Technical Report, 2014 /// http://www.cs.utexas.edu/users/flame/pubs/TOMS-BLIS-Analytical.pdf /// /// @see ScheduleTreeOptimizer::createMicroKernel /// @see ScheduleTreeOptimizer::createMacroKernel /// @see getMicroKernelParams /// @see getMacroKernelParams /// /// TODO: Implement the packing transformation. /// /// @param Node The node that contains a band to be optimized. The node /// is required to successfully pass /// ScheduleTreeOptimizer::isMatrMultPattern. /// @param TTI Target Transform Info. /// @param D The dependencies. /// /// @returns The transformed schedule or nullptr if the optimization /// cannot be applied. isl::schedule_node tryOptimizeMatMulPattern(isl::schedule_node Node, const llvm::TargetTransformInfo *TTI, const Dependences *D); } // namespace polly #endif // POLLY_MATMULOPTIMIZER_H