SparseTensorStorageLayout.h | Explore in Territory

//===- SparseTensorStorageLayout.h ------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This header file defines utilities for the sparse memory layout.
//
//===----------------------------------------------------------------------===//

#ifndef MLIR_DIALECT_SPARSETENSOR_IR_SPARSETENSORSTORAGELAYOUT_H_
#define MLIR_DIALECT_SPARSETENSOR_IR_SPARSETENSORSTORAGELAYOUT_H_

#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h"

namespace mlir {
namespace sparse_tensor {

///===----------------------------------------------------------------------===//
/// The sparse tensor storage scheme for a tensor is organized as a single
/// compound type with the following fields. Note that every memref with `?`
/// size actually behaves as a "vector", i.e. the stored size is the capacity
/// and the used size resides in the storage_specifier struct.
///
/// struct {
///   ; per-level l:
///   ;  if dense:
///        <nothing>
///   ;  if compressed:
///        memref<[batch] x ? x pos>  positions   ; positions for level l
///        memref<[batch] x ? x crd>  coordinates ; coordinates for level l
///   ;  if loose-[batch] x compressed:
///        memref<[batch] x ? x pos>  positions   ; lo/hi pos pairs for level l
///        memref<[batch] x ? x crd>  coordinates ; coordinates for level l
///   ;  if singleton/2-out-of-4:
///        memref<[batch] x ? x crd>  coordinates ; coordinates for level l
///
///   memref<[batch] x ? x eltType> values        ; values
///
///   struct sparse_tensor.storage_specifier {
///     array<rank x int> lvlSizes    ; sizes/cardinalities for each level
///     // TODO: memSizes need to be expanded to array<[batch] x n x int> to
///     // support different sizes for different batches. At the moment, we
///     // assume that every batch occupies the same memory size.
///     array<n x int> memSizes       ; sizes/lengths for each data memref
///   }
/// };
///
/// In addition, for a "trailing COO region", defined as a compressed level
/// followed by one or more singleton levels, the default SOA storage that
/// is inherent to the TACO format is optimized into an AOS storage where
/// all coordinates of a stored element appear consecutively.  In such cases,
/// a special operation (sparse_tensor.coordinates_buffer) must be used to
/// access the AOS coordinates array. In the code below, the method
/// `getCOOStart` is used to find the start of the "trailing COO region".
///
/// If the sparse tensor is a slice (produced by `tensor.extract_slice`
/// operation), instead of allocating a new sparse tensor for it, it reuses the
/// same sets of MemRefs but attaching a additional set of slicing-metadata for
/// per-dimension slice offset and stride.
///
/// Examples.
///
/// #CSR storage of 2-dim matrix yields
///  memref<?xindex>                           ; positions-1
///  memref<?xindex>                           ; coordinates-1
///  memref<?xf64>                             ; values
///  struct<(array<2 x i64>, array<3 x i64>)>) ; lvl0, lvl1, 3xsizes
///
/// #COO storage of 2-dim matrix yields
///  memref<?xindex>,                          ; positions-0, essentially [0,sz]
///  memref<?xindex>                           ; AOS coordinates storage
///  memref<?xf64>                             ; values
///  struct<(array<2 x i64>, array<3 x i64>)>) ; lvl0, lvl1, 3xsizes
///
/// Slice on #COO storage of 2-dim matrix yields
///  ;; Inherited from the original sparse tensors
///  memref<?xindex>,                          ; positions-0, essentially [0,sz]
///  memref<?xindex>                           ; AOS coordinates storage
///  memref<?xf64>                             ; values
///  struct<(array<2 x i64>, array<3 x i64>,   ; lvl0, lvl1, 3xsizes
///  ;; Extra slicing-metadata
///          array<2 x i64>, array<2 x i64>)>) ; dim offset, dim stride.
///
///===----------------------------------------------------------------------===//

enum class SparseTensorFieldKind : uint32_t { … };

inline StorageSpecifierKind toSpecifierKind(SparseTensorFieldKind kind) { … }

inline SparseTensorFieldKind toFieldKind(StorageSpecifierKind kind) { … }

/// The type of field indices.  This alias is to help code be more
/// self-documenting; unfortunately it is not type-checked, so it only
/// provides documentation rather than doing anything to prevent mixups.
FieldIndex;

/// Provides methods to access fields of a sparse tensor with the given
/// encoding.
class StorageLayout { … };

//
// Wrapper functions to invoke StorageLayout-related method.
//

inline unsigned getNumFieldsFromEncoding(SparseTensorEncodingAttr enc) { … }

inline unsigned getNumDataFieldsFromEncoding(SparseTensorEncodingAttr enc) { … }

inline void foreachFieldInSparseTensor(
    SparseTensorEncodingAttr enc,
    llvm::function_ref<bool(FieldIndex, SparseTensorFieldKind, Level,
                            LevelType)>
        callback) { … }

void foreachFieldAndTypeInSparseTensor(
    SparseTensorType,
    llvm::function_ref<bool(Type, FieldIndex, SparseTensorFieldKind, Level,
                            LevelType)>);

} // namespace sparse_tensor
} // namespace mlir

#endif // MLIR_DIALECT_SPARSETENSOR_IR_SPARSETENSORSTORAGELAYOUT_H_
llvm/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorStorageLayout.h