//===- EliminateBarriers.cpp - Eliminate extra barriers --===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Barrier elimination pattern and pass. If a barrier does not enforce any // conflicting pair of memory effects, including a pair that is enforced by // another barrier, it is unnecessary and can be removed. Adapted from // "High-Performance GPU-to-CPU Transpilation and Optimization via High-Level // Parallel Constructs" by Moses, Ivanov, Domke, Endo, Doerfert, and Zinenko in // PPoPP 2023 and implementation in Polygeist. // //===----------------------------------------------------------------------===// #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/GPU/Transforms/Passes.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/IR/Operation.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/Debug.h" namespace mlir { #define GEN_PASS_DEF_GPUELIMINATEBARRIERS #include "mlir/Dialect/GPU/Transforms/Passes.h.inc" } // namespace mlir usingnamespacemlir; usingnamespacemlir::gpu; #define DEBUG_TYPE … #define DEBUG_TYPE_ALIAS … #define DBGS() … #define DBGS_ALIAS() … // The functions below provide interface-like verification, but are too specific // to barrier elimination to become interfaces. /// Implement the MemoryEffectsOpInterface in the suitable way. static bool isKnownNoEffectsOpWithoutInterface(Operation *op) { … } /// Returns `true` if the op is defines the parallel region that is subject to /// barrier synchronization. static bool isParallelRegionBoundary(Operation *op) { … } /// Returns `true` if the op behaves like a sequential loop, e.g., the control /// flow "wraps around" from the end of the body region back to its start. static bool isSequentialLoopLike(Operation *op) { … } /// Returns `true` if the regions of the op are guaranteed to be executed at /// most once. Thus, if an operation in one of the nested regions of `op` is /// executed than so are all the other operations in this region. static bool hasSingleExecutionBody(Operation *op) { … } /// Returns `true` if the operation is known to produce a pointer-like object /// distinct from any other object produced by a similar operation. For example, /// an allocation produces such an object. static bool producesDistinctBase(Operation *op) { … } /// Populates `effects` with all memory effects without associating them to a /// specific value. static void addAllValuelessEffects( SmallVectorImpl<MemoryEffects::EffectInstance> &effects) { … } /// Collect the memory effects of the given op in 'effects'. Returns 'true' if /// it could extract the effect information from the op, otherwise returns /// 'false' and conservatively populates the list with all possible effects /// associated with no particular value or symbol. static bool collectEffects(Operation *op, SmallVectorImpl<MemoryEffects::EffectInstance> &effects, bool ignoreBarriers = true) { … } /// Collects memory effects from operations that may be executed before `op` in /// a trivial structured control flow, e.g., without branches. Stops at the /// parallel region boundary or at the barrier operation if `stopAtBarrier` is /// set. Returns `true` if the memory effects added to `effects` are exact, /// `false` if they are a conservative over-approximation. The latter means that /// `effects` contain instances not associated with a specific value. static bool getEffectsBefore(Operation *op, SmallVectorImpl<MemoryEffects::EffectInstance> &effects, bool stopAtBarrier) { … } /// Collects memory effects from operations that may be executed after `op` in /// a trivial structured control flow, e.g., without branches. Stops at the /// parallel region boundary or at the barrier operation if `stopAtBarrier` is /// set. Returns `true` if the memory effects added to `effects` are exact, /// `false` if they are a conservative over-approximation. The latter means that /// `effects` contain instances not associated with a specific value. static bool getEffectsAfter(Operation *op, SmallVectorImpl<MemoryEffects::EffectInstance> &effects, bool stopAtBarrier) { … } /// Looks through known "view-like" ops to find the base memref. static Value getBase(Value v) { … } /// Returns `true` if the value is defined as a function argument. static bool isFunctionArgument(Value v) { … } /// Returns the operand that the operation "propagates" through it for capture /// purposes. That is, if the value produced by this operation is captured, then /// so is the returned value. static Value propagatesCapture(Operation *op) { … } /// Returns `true` if the given operation is known to capture the given value, /// `false` if it is known not to capture the given value, `nullopt` if neither /// is known. static std::optional<bool> getKnownCapturingStatus(Operation *op, Value v) { … } /// Returns `true` if the value may be captured by any of its users, i.e., if /// the user may be storing this value into memory. This makes aliasing analysis /// more conservative as it cannot assume the pointer-like value is only passed /// around through SSA use-def. static bool maybeCaptured(Value v) { … } /// Returns true if two values may be referencing aliasing memory. This is a /// rather naive and conservative analysis. Values defined by different /// allocation-like operations as well as values derived from those by casts and /// views cannot alias each other. Similarly, values defined by allocations /// inside a function cannot alias function arguments. Global values cannot /// alias each other or local allocations. Values that are captured, i.e. /// themselves potentially stored in memory, are considered as aliasing with /// everything. This seems sufficient to achieve barrier removal in structured /// control flow, more complex cases would require a proper dataflow analysis. static bool mayAlias(Value first, Value second) { … } /// Returns `true` if the effect may be affecting memory aliasing the value. If /// the effect is not associated with any value, it is assumed to affect all /// memory and therefore aliases with everything. static bool mayAlias(MemoryEffects::EffectInstance a, Value v2) { … } /// Returns `true` if the two effects may be affecting aliasing memory. If /// an effect is not associated with any value, it is assumed to affect all /// memory and therefore aliases with everything. Effects on different resources /// cannot alias. static bool mayAlias(MemoryEffects::EffectInstance a, MemoryEffects::EffectInstance b) { … } /// Returns `true` if any of the "before" effect instances has a conflict with /// any "after" instance for the purpose of barrier elimination. The effects are /// supposed to be limited to a barrier synchronization scope. A conflict exists /// if effects instances affect aliasing memory locations and at least on of /// then as a write. As an exception, if the non-write effect is an allocation /// effect, there is no conflict since we are only expected to see the /// allocation happening in the same thread and it cannot be accessed from /// another thread without capture (which we do handle in alias analysis). static bool haveConflictingEffects(ArrayRef<MemoryEffects::EffectInstance> beforeEffects, ArrayRef<MemoryEffects::EffectInstance> afterEffects) { … } namespace { class BarrierElimination final : public OpRewritePattern<BarrierOp> { … }; class GpuEliminateBarriersPass : public impl::GpuEliminateBarriersBase<GpuEliminateBarriersPass> { … }; } // namespace void mlir::populateGpuEliminateBarriersPatterns(RewritePatternSet &patterns) { … }