//===- Utils.cpp ---- Misc utilities for analysis -------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements miscellaneous analysis routines for non-loop IR // structures. // //===----------------------------------------------------------------------===// #include "mlir/Dialect/Affine/Analysis/Utils.h" #include "mlir/Analysis/Presburger/PresburgerRelation.h" #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/IR/AffineValueMap.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/IR/IntegerSet.h" #include "mlir/Interfaces/CallInterfaces.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <optional> #define DEBUG_TYPE … usingnamespacemlir; usingnamespaceaffine; usingnamespacepresburger; SmallDenseMap; Node; // LoopNestStateCollector walks loop nests and collects load and store // operations, and whether or not a region holding op other than ForOp and IfOp // was encountered in the loop nest. void LoopNestStateCollector::collect(Operation *opToWalk) { … } // Returns the load op count for 'memref'. unsigned Node::getLoadOpCount(Value memref) const { … } // Returns the store op count for 'memref'. unsigned Node::getStoreOpCount(Value memref) const { … } // Returns all store ops in 'storeOps' which access 'memref'. void Node::getStoreOpsForMemref(Value memref, SmallVectorImpl<Operation *> *storeOps) const { … } // Returns all load ops in 'loadOps' which access 'memref'. void Node::getLoadOpsForMemref(Value memref, SmallVectorImpl<Operation *> *loadOps) const { … } // Returns all memrefs in 'loadAndStoreMemrefSet' for which this node // has at least one load and store operation. void Node::getLoadAndStoreMemrefSet( DenseSet<Value> *loadAndStoreMemrefSet) const { … } // Initializes the data dependence graph by walking operations in `block`. // Assigns each node in the graph a node id based on program order in 'f'. bool MemRefDependenceGraph::init() { … } // Returns the graph node for 'id'. Node *MemRefDependenceGraph::getNode(unsigned id) { … } // Returns the graph node for 'forOp'. Node *MemRefDependenceGraph::getForOpNode(AffineForOp forOp) { … } // Adds a node with 'op' to the graph and returns its unique identifier. unsigned MemRefDependenceGraph::addNode(Operation *op) { … } // Remove node 'id' (and its associated edges) from graph. void MemRefDependenceGraph::removeNode(unsigned id) { … } // Returns true if node 'id' writes to any memref which escapes (or is an // argument to) the block. Returns false otherwise. bool MemRefDependenceGraph::writesToLiveInOrEscapingMemrefs(unsigned id) { … } // Returns true iff there is an edge from node 'srcId' to node 'dstId' which // is for 'value' if non-null, or for any value otherwise. Returns false // otherwise. bool MemRefDependenceGraph::hasEdge(unsigned srcId, unsigned dstId, Value value) { … } // Adds an edge from node 'srcId' to node 'dstId' for 'value'. void MemRefDependenceGraph::addEdge(unsigned srcId, unsigned dstId, Value value) { … } // Removes an edge from node 'srcId' to node 'dstId' for 'value'. void MemRefDependenceGraph::removeEdge(unsigned srcId, unsigned dstId, Value value) { … } // Returns true if there is a path in the dependence graph from node 'srcId' // to node 'dstId'. Returns false otherwise. `srcId`, `dstId`, and the // operations that the edges connected are expected to be from the same block. bool MemRefDependenceGraph::hasDependencePath(unsigned srcId, unsigned dstId) { … } // Returns the input edge count for node 'id' and 'memref' from src nodes // which access 'memref' with a store operation. unsigned MemRefDependenceGraph::getIncomingMemRefAccesses(unsigned id, Value memref) { … } // Returns the output edge count for node 'id' and 'memref' (if non-null), // otherwise returns the total output edge count from node 'id'. unsigned MemRefDependenceGraph::getOutEdgeCount(unsigned id, Value memref) { … } /// Return all nodes which define SSA values used in node 'id'. void MemRefDependenceGraph::gatherDefiningNodes( unsigned id, DenseSet<unsigned> &definingNodes) { … } // Computes and returns an insertion point operation, before which the // the fused <srcId, dstId> loop nest can be inserted while preserving // dependences. Returns nullptr if no such insertion point is found. Operation * MemRefDependenceGraph::getFusedLoopNestInsertionPoint(unsigned srcId, unsigned dstId) { … } // Updates edge mappings from node 'srcId' to node 'dstId' after fusing them, // taking into account that: // *) if 'removeSrcId' is true, 'srcId' will be removed after fusion, // *) memrefs in 'privateMemRefs' has been replaced in node at 'dstId' by a // private memref. void MemRefDependenceGraph::updateEdges(unsigned srcId, unsigned dstId, const DenseSet<Value> &privateMemRefs, bool removeSrcId) { … } // Update edge mappings for nodes 'sibId' and 'dstId' to reflect fusion // of sibling node 'sibId' into node 'dstId'. void MemRefDependenceGraph::updateEdges(unsigned sibId, unsigned dstId) { … } // Adds ops in 'loads' and 'stores' to node at 'id'. void MemRefDependenceGraph::addToNode( unsigned id, const SmallVectorImpl<Operation *> &loads, const SmallVectorImpl<Operation *> &stores) { … } void MemRefDependenceGraph::clearNodeLoadAndStores(unsigned id) { … } // Calls 'callback' for each input edge incident to node 'id' which carries a // memref dependence. void MemRefDependenceGraph::forEachMemRefInputEdge( unsigned id, const std::function<void(Edge)> &callback) { … } // Calls 'callback' for each output edge from node 'id' which carries a // memref dependence. void MemRefDependenceGraph::forEachMemRefOutputEdge( unsigned id, const std::function<void(Edge)> &callback) { … } // Calls 'callback' for each edge in 'edges' which carries a memref // dependence. void MemRefDependenceGraph::forEachMemRefEdge( ArrayRef<Edge> edges, const std::function<void(Edge)> &callback) { … } void MemRefDependenceGraph::print(raw_ostream &os) const { … } void mlir::affine::getAffineForIVs(Operation &op, SmallVectorImpl<AffineForOp> *loops) { … } void mlir::affine::getEnclosingAffineOps(Operation &op, SmallVectorImpl<Operation *> *ops) { … } // Populates 'cst' with FlatAffineValueConstraints which represent original // domain of the loop bounds that define 'ivs'. LogicalResult ComputationSliceState::getSourceAsConstraints( FlatAffineValueConstraints &cst) const { … } // Populates 'cst' with FlatAffineValueConstraints which represent slice bounds. LogicalResult ComputationSliceState::getAsConstraints(FlatAffineValueConstraints *cst) const { … } // Clears state bounds and operand state. void ComputationSliceState::clearBounds() { … } void ComputationSliceState::dump() const { … } /// Fast check to determine if the computation slice is maximal. Returns true if /// each slice dimension maps to an existing dst dimension and both the src /// and the dst loops for those dimensions have the same bounds. Returns false /// if both the src and the dst loops don't have the same bounds. Returns /// std::nullopt if none of the above can be proven. std::optional<bool> ComputationSliceState::isSliceMaximalFastCheck() const { … } /// Returns true if it is deterministically verified that the original iteration /// space of the slice is contained within the new iteration space that is /// created after fusing 'this' slice into its destination. std::optional<bool> ComputationSliceState::isSliceValid() const { … } /// Returns true if the computation slice encloses all the iterations of the /// sliced loop nest. Returns false if it does not. Returns std::nullopt if it /// cannot determine if the slice is maximal or not. std::optional<bool> ComputationSliceState::isMaximal() const { … } unsigned MemRefRegion::getRank() const { … } std::optional<int64_t> MemRefRegion::getConstantBoundingSizeAndShape( SmallVectorImpl<int64_t> *shape, std::vector<SmallVector<int64_t, 4>> *lbs, SmallVectorImpl<int64_t> *lbDivisors) const { … } void MemRefRegion::getLowerAndUpperBound(unsigned pos, AffineMap &lbMap, AffineMap &ubMap) const { … } LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) { … } /// Computes the memory region accessed by this memref with the region /// represented as constraints symbolic/parametric in 'loopDepth' loops /// surrounding opInst and any additional Function symbols. // For example, the memref region for this load operation at loopDepth = 1 will // be as below: // // affine.for %i = 0 to 32 { // affine.for %ii = %i to (d0) -> (d0 + 8) (%i) { // load %A[%ii] // } // } // // region: {memref = %A, write = false, {%i <= m0 <= %i + 7} } // The last field is a 2-d FlatAffineValueConstraints symbolic in %i. // // TODO: extend this to any other memref dereferencing ops // (dma_start, dma_wait). LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth, const ComputationSliceState *sliceState, bool addMemRefDimBounds) { … } std::optional<int64_t> mlir::affine::getMemRefIntOrFloatEltSizeInBytes(MemRefType memRefType) { … } // Returns the size of the region. std::optional<int64_t> MemRefRegion::getRegionSize() { … } /// Returns the size of memref data in bytes if it's statically shaped, /// std::nullopt otherwise. If the element of the memref has vector type, takes /// into account size of the vector as well. // TODO: improve/complete this when we have target data. std::optional<uint64_t> mlir::affine::getIntOrFloatMemRefSizeInBytes(MemRefType memRefType) { … } template <typename LoadOrStoreOp> LogicalResult mlir::affine::boundCheckLoadOrStoreOp(LoadOrStoreOp loadOrStoreOp, bool emitError) { … } // Explicitly instantiate the template so that the compiler knows we need them! template LogicalResult mlir::affine::boundCheckLoadOrStoreOp(AffineReadOpInterface loadOp, bool emitError); template LogicalResult mlir::affine::boundCheckLoadOrStoreOp(AffineWriteOpInterface storeOp, bool emitError); // Returns in 'positions' the Block positions of 'op' in each ancestor // Block from the Block containing operation, stopping at 'limitBlock'. static void findInstPosition(Operation *op, Block *limitBlock, SmallVectorImpl<unsigned> *positions) { … } // Returns the Operation in a possibly nested set of Blocks, where the // position of the operation is represented by 'positions', which has a // Block position for each level of nesting. static Operation *getInstAtPosition(ArrayRef<unsigned> positions, unsigned level, Block *block) { … } // Adds loop IV bounds to 'cst' for loop IVs not found in 'ivs'. static LogicalResult addMissingLoopIVBounds(SmallPtrSet<Value, 8> &ivs, FlatAffineValueConstraints *cst) { … } /// Returns the innermost common loop depth for the set of operations in 'ops'. // TODO: Move this to LoopUtils. unsigned mlir::affine::getInnermostCommonLoopDepth( ArrayRef<Operation *> ops, SmallVectorImpl<AffineForOp> *surroundingLoops) { … } /// Computes in 'sliceUnion' the union of all slice bounds computed at /// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB', and /// then verifies if it is valid. Returns 'SliceComputationResult::Success' if /// union was computed correctly, an appropriate failure otherwise. SliceComputationResult mlir::affine::computeSliceUnion(ArrayRef<Operation *> opsA, ArrayRef<Operation *> opsB, unsigned loopDepth, unsigned numCommonLoops, bool isBackwardSlice, ComputationSliceState *sliceUnion) { … } // TODO: extend this to handle multiple result maps. static std::optional<uint64_t> getConstDifference(AffineMap lbMap, AffineMap ubMap) { … } // Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop // nest surrounding represented by slice loop bounds in 'slice'. Returns true // on success, false otherwise (if a non-constant trip count was encountered). // TODO: Make this work with non-unit step loops. bool mlir::affine::buildSliceTripCountMap( const ComputationSliceState &slice, llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountMap) { … } // Return the number of iterations in the given slice. uint64_t mlir::affine::getSliceIterationCount( const llvm::SmallDenseMap<Operation *, uint64_t, 8> &sliceTripCountMap) { … } const char *const kSliceFusionBarrierAttrName = …; // Computes slice bounds by projecting out any loop IVs from // 'dependenceConstraints' at depth greater than 'loopDepth', and computes slice // bounds in 'sliceState' which represent the one loop nest's IVs in terms of // the other loop nest's IVs, symbols and constants (using 'isBackwardsSlice'). void mlir::affine::getComputationSliceState( Operation *depSourceOp, Operation *depSinkOp, FlatAffineValueConstraints *dependenceConstraints, unsigned loopDepth, bool isBackwardSlice, ComputationSliceState *sliceState) { … } /// Creates a computation slice of the loop nest surrounding 'srcOpInst', /// updates the slice loop bounds with any non-null bound maps specified in /// 'sliceState', and inserts this slice into the loop nest surrounding /// 'dstOpInst' at loop depth 'dstLoopDepth'. // TODO: extend the slicing utility to compute slices that // aren't necessarily a one-to-one relation b/w the source and destination. The // relation between the source and destination could be many-to-many in general. // TODO: the slice computation is incorrect in the cases // where the dependence from the source to the destination does not cover the // entire destination index set. Subtract out the dependent destination // iterations from destination index set and check for emptiness --- this is one // solution. AffineForOp mlir::affine::insertBackwardComputationSlice( Operation *srcOpInst, Operation *dstOpInst, unsigned dstLoopDepth, ComputationSliceState *sliceState) { … } // Constructs MemRefAccess populating it with the memref, its indices and // opinst from 'loadOrStoreOpInst'. MemRefAccess::MemRefAccess(Operation *loadOrStoreOpInst) { … } unsigned MemRefAccess::getRank() const { … } bool MemRefAccess::isStore() const { … } /// Returns the nesting depth of this statement, i.e., the number of loops /// surrounding this statement. unsigned mlir::affine::getNestingDepth(Operation *op) { … } /// Equal if both affine accesses are provably equivalent (at compile /// time) when considering the memref, the affine maps and their respective /// operands. The equality of access functions + operands is checked by /// subtracting fully composed value maps, and then simplifying the difference /// using the expression flattener. /// TODO: this does not account for aliasing of memrefs. bool MemRefAccess::operator==(const MemRefAccess &rhs) const { … } void mlir::affine::getAffineIVs(Operation &op, SmallVectorImpl<Value> &ivs) { … } /// Returns the number of surrounding loops common to 'loopsA' and 'loopsB', /// where each lists loops from outer-most to inner-most in loop nest. unsigned mlir::affine::getNumCommonSurroundingLoops(Operation &a, Operation &b) { … } static std::optional<int64_t> getMemoryFootprintBytes(Block &block, Block::iterator start, Block::iterator end, int memorySpace) { … } std::optional<int64_t> mlir::affine::getMemoryFootprintBytes(AffineForOp forOp, int memorySpace) { … } /// Returns whether a loop is parallel and contains a reduction loop. bool mlir::affine::isLoopParallelAndContainsReduction(AffineForOp forOp) { … } /// Returns in 'sequentialLoops' all sequential loops in loop nest rooted /// at 'forOp'. void mlir::affine::getSequentialLoops( AffineForOp forOp, llvm::SmallDenseSet<Value, 8> *sequentialLoops) { … } IntegerSet mlir::affine::simplifyIntegerSet(IntegerSet set) { … } static void unpackOptionalValues(ArrayRef<std::optional<Value>> source, SmallVector<Value> &target) { … } /// Bound an identifier `pos` in a given FlatAffineValueConstraints with /// constraints drawn from an affine map. Before adding the constraint, the /// dimensions/symbols of the affine map are aligned with `constraints`. /// `operands` are the SSA Value operands used with the affine map. /// Note: This function adds a new symbol column to the `constraints` for each /// dimension/symbol that exists in the affine map but not in `constraints`. static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints, BoundType type, unsigned pos, AffineMap map, ValueRange operands) { … } /// Add `val` to each result of `map`. static AffineMap addConstToResults(AffineMap map, int64_t val) { … } // Attempt to simplify the given min/max operation by proving that its value is // bounded by the same lower and upper bound. // // Bounds are computed by FlatAffineValueConstraints. Invariants required for // finding/proving bounds should be supplied via `constraints`. // // 1. Add dimensions for `op` and `opBound` (lower or upper bound of `op`). // 2. Compute an upper bound of `op` (in case of `isMin`) or a lower bound (in // case of `!isMin`) and bind it to `opBound`. SSA values that are used in // `op` but are not part of `constraints`, are added as extra symbols. // 3. For each result of `op`: Add result as a dimension `r_i`. Prove that: // * If `isMin`: r_i >= opBound // * If `isMax`: r_i <= opBound // If this is the case, ub(op) == lb(op). // 4. Replace `op` with `opBound`. // // In summary, the following constraints are added throughout this function. // Note: `invar` are dimensions added by the caller to express the invariants. // (Showing only the case where `isMin`.) // // invar | op | opBound | r_i | extra syms... | const | eq/ineq // ------+-------+---------+-----+---------------+-------+------------------- // (various eq./ineq. constraining `invar`, added by the caller) // ... | 0 | 0 | 0 | 0 | ... | ... // ------+-------+---------+-----+---------------+-------+------------------- // (various ineq. constraining `op` in terms of `op` operands (`invar` and // extra `op` operands "extra syms" that are not in `invar`)). // ... | -1 | 0 | 0 | ... | ... | >= 0 // ------+-------+---------+-----+---------------+-------+------------------- // (set `opBound` to `op` upper bound in terms of `invar` and "extra syms") // ... | 0 | -1 | 0 | ... | ... | = 0 // ------+-------+---------+-----+---------------+-------+------------------- // (for each `op` map result r_i: set r_i to corresponding map result, // prove that r_i >= minOpUb via contradiction) // ... | 0 | 0 | -1 | ... | ... | = 0 // 0 | 0 | 1 | -1 | 0 | -1 | >= 0 // FailureOr<AffineValueMap> mlir::affine::simplifyConstrainedMinMaxOp( Operation *op, FlatAffineValueConstraints constraints) { … }