llvm/flang/include/flang/Optimizer/CodeGen/TBAABuilder.h

//===-- TBAABuilder.h -- TBAA builder declarations --------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
//
//===----------------------------------------------------------------------===//

#ifndef FORTRAN_OPTIMIZER_CODEGEN_TBAABUILDER_H
#define FORTRAN_OPTIMIZER_CODEGEN_TBAABUILDER_H

#include "flang/Optimizer/Analysis/TBAAForest.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"

namespace fir {

// TBAA builder provides mapping between FIR types and their TBAA type
// descriptors, and methods to populate that mapping during FIR to LLVM
// type conversion and to attach llvm.tbaa attributes to memory access
// instructions.
//
// TBAA type information is represented with LLVM::MetadataOp operation
// with specific symbol name `TBAABuilder::tbaaMetaOpName`. The basic
// TBAA trees used for Flang consists of the following nodes:
//   llvm.metadata @__flang_tbaa {
//     llvm.tbaa_root @root_0 {id = "Flang Type TBAA Function Root funcName"}
//     llvm.tbaa_type_desc @type_desc_1 {id = "any access",
//                                       members = {<@root_0, 0>}}
//     llvm.tbaa_type_desc @type_desc_2 {id = "any data access",
//                                       members = {<@type_desc_1, 0>}}
//     llvm.tbaa_type_desc @type_desc_3 {id = "descriptor member",
//                                       members = {<@type_desc_1, 0>}}
//   }
//
// The `<any data access>` and `<descriptor member>` type descriptors
// are two sub-roots of the basic TBAA tree, and they allow representing
// box and non-box accesses, which can never alias in the current Flang
// implementation. The `<any access>` type descriptor is their common parent
// that can be used for representing accesses that may alias box and non-box
// accesses if an access cannot be classified strictly as box or non-box.
// In the current implementation `<any access>` is not used by TBAA access tags,
// because it is always known whether an operation accesses box or non-box.
//
// Given this basic TBAA tree structure, the box/descriptor types may
// be represented like this:
//   llvm.tbaa_type_desc @type_desc_4 {
//       id = "CFI_cdesc_t_dim0",
//       members = {<@type_desc_3, 0>, // base_addr
//                  <@type_desc_3, 8>, // elem_len
//                  <@type_desc_3, 16>, // version
//                  <@type_desc_3, 20>, // rank
//                  <@type_desc_3, 21>, // type
//                  <@type_desc_3, 22>, // attribute
//                  <@type_desc_3, 23>} // extra
//   }
//   llvm.tbaa_type_desc @type_desc_5 {
//       id = "CFI_cdesc_t_dim1",
//       members = {<@type_desc_3, 0>, // base_addr
//                  <@type_desc_3, 8>, // elem_len
//                  <@type_desc_3, 16>, // version
//                  <@type_desc_3, 20>, // rank
//                  <@type_desc_3, 21>, // type
//                  <@type_desc_3, 22>, // attribute
//                  <@type_desc_3, 23>, // extra
//                  <@type_desc_3, 24>, // dim[0].lower_bound
//                  <@type_desc_3, 32>, // dim[0].extent
//                  <@type_desc_3, 40>} // dim[0].sm
//   }
//   llvm.tbaa_type_desc @type_desc_6 {
//       id = "CFI_cdesc_t_dim2",
//       members = {<@type_desc_3, 0>, // base_addr
//                  <@type_desc_3, 8>, // elem_len
//                  <@type_desc_3, 16>, // version
//                  <@type_desc_3, 20>, // rank
//                  <@type_desc_3, 21>, // type
//                  <@type_desc_3, 22>, // attribute
//                  <@type_desc_3, 23>, // extra
//                  <@type_desc_3, 24>, // dim[0].lower_bound
//                  <@type_desc_3, 32>, // dim[0].extent
//                  <@type_desc_3, 40>, // dim[0].sm
//                  <@type_desc_3, 48>, // dim[1].lower_bound
//                  <@type_desc_3, 56>, // dim[1].extent
//                  <@type_desc_3, 64>} // dim[1].sm
//   }
// etc.
//
// Note that the TBAA type descriptors cannot represent array members
// of structures, so the `dim` array in the descriptor structure
// has to be represented as linear set of members.
//
// We can use the same TBAA type descriptor for all members of the F18
// descriptor structure, because the actual accesses of the F18 descriptor
// members will be disambiguated based on their offset off the beginning
// of the descriptor. Thus, all members have the same `<descriptor member>`
// type in the TBAA graph.
//
// The TBAA type descriptors have to be created during FIR to LLVM type
// conversion, so fir::LLVMTypeConverter has to provide the member offsets
// to TBAABuilder - the offsets must be computed based on the LLVM type
// to which the FIR type is being converted.
//
// TBAABuilder keeps a map between the FIR type and its TBAA type descriptor.
// The map is used when a TBAA tag needs to be attached to a memory accessing
// operation given the FIR types identifying the access's base and access type
// and the offset within the base type, e.g. an access of one dimensional
// descriptor's `base_addr` member may be defined by:
//   * base FIR type: !fir.box<!fir.array<?xf32>> - the resulting
//     access tag will use `<CFI_cdesc_t_dim1>` type descriptor for the base
//     type.
//   * access FIR type: <undefined> - all accesses within descriptors
//     are always represented with `<descriptor member>` type descriptor.
//   * offset:
//       llvm.getelementptr %arg0[0, 0] :
//           (!llvm.ptr<struct<(ptr<f32>, i64, i32, i8, i8, i8, i8,
//                              array<1 x array<3 x i64>>)>>) ->
//           !llvm.ptr<ptr<f32>>
//     The offset is computed based on the LLVM::GEPOp's indices and the LLVM
//     type layout.
//
// Detailed representation of the layout of the F18 descriptors is required
// to disambiguate accesses of the different members of the descriptors,
// e.g. a read of `base_addr` member (of one box) can never alias with
// a write of `rank` member (of another box).
//
// TODO: define handling of assumed-rank arrays' boxes (they can always
// be represented with a conservative tag:
//   < `<descriptor member>`, `<descriptor member>`, 0 >
// so that they alias with any other box accesses.
//
// The same representation can be used for user-defined types, though,
// strict type aliasing cannot be applied for Fortran programs without
// additional guarantees from the user. Fortran's storage association
// constructs provide a way to alias data of different types, so using
// TBAA would be incorrect, e.g.:
//   subroutine test()
//     real :: a
//     integer :: b
//     equivalence (a, b)
//     a = 1.0
//     call test2(b)
//   end subroutine test
//
// The store operation for `a = 1.0` has the base/access type `f32`,
// while a load from `b` inside `test2` will have base/access type
// `i32`. Due to the storage association the store and the load alias,
// so using the access types to create TBAA access tags may result
// in an incorrect result if `test2` was inlined. Moreover, in the scope
// of `test2` Flang is not able to indentify whether `b` is part
// of an equivalence.
//
// TBAA may still be applied for programs not using storage association
// for objects of different data types (e.g. under an opt-in compiler option).
//
// The initial implementation does not create detailed type descriptors
// for box types and always uses the conservative box access tag:
//   < `<descriptor member>`, `<descriptor member>`, 0 >
//
// Given the storage association, all non-box accesses are represented
// with the conservative data access tag:
//   < `<any data access>`, `<any data access>`, 0 >

// additional tags are added in flang/Optimizer/Transforms/AddAliasTags.cpp
// (before CodeGen)
class TBAABuilder {
public:
  /// if forceUnifiedTree is true, functions will not have different TBAA trees
  TBAABuilder(mlir::MLIRContext *context, bool applyTBAA,
              bool forceUnifiedTree = false);
  TBAABuilder(TBAABuilder const &) = delete;
  TBAABuilder &operator=(TBAABuilder const &) = delete;

  // Attach the llvm.tbaa attribute to the given memory accessing operation
  // based on the provided base/access FIR types and the GEPOp.
  void attachTBAATag(mlir::LLVM::AliasAnalysisOpInterface op,
                     mlir::Type baseFIRType, mlir::Type accessFIRType,
                     mlir::LLVM::GEPOp gep);

private:
  // Find or create TBAATagAttr attribute (TBAA access tag) with the specified
  // components and return it.
  mlir::LLVM::TBAATagAttr
  getAccessTag(mlir::LLVM::TBAATypeDescriptorAttr baseTypeDesc,
               mlir::LLVM::TBAATypeDescriptorAttr accessTypeDesc,
               int64_t offset);

  // Returns TBAATagAttr representing access tag:
  //   < <descriptor member>, <descriptor member>, 0 >
  mlir::LLVM::TBAATagAttr getAnyBoxAccessTag(mlir::LLVM::LLVMFuncOp func);
  // Returns TBAATagAttr representing access tag:
  //   < <any data access>, <any data access>, 0 >
  mlir::LLVM::TBAATagAttr getAnyDataAccessTag(mlir::LLVM::LLVMFuncOp func);
  // Returns TBAATagAttr representing access tag:
  //   < <any access>, <any access>, 0 >
  mlir::LLVM::TBAATagAttr getAnyAccessTag(mlir::LLVM::LLVMFuncOp func);

  // Returns TBAATagAttr representing access tag described by the base and
  // access FIR types and the LLVM::GepOp representing the access in terms of
  // the FIR types converted to LLVM types. The base type must be derivative of
  // fir::BaseBoxType.
  mlir::LLVM::TBAATagAttr getBoxAccessTag(mlir::Type baseFIRType,
                                          mlir::Type accessFIRType,
                                          mlir::LLVM::GEPOp gep,
                                          mlir::LLVM::LLVMFuncOp func);

  // Returns TBAATagAttr representing access tag described by the base and
  // access FIR types and the LLVM::GepOp representing the access in terms of
  // the FIR types converted to LLVM types. The FIR types must describe the
  // "data" access, i.e. not an access of any box/descriptor member.
  mlir::LLVM::TBAATagAttr getDataAccessTag(mlir::Type baseFIRType,
                                           mlir::Type accessFIRType,
                                           mlir::LLVM::GEPOp gep,
                                           mlir::LLVM::LLVMFuncOp func);

  // Set to true, if TBAA builder is active, otherwise, all public
  // methods are no-ops.
  bool enableTBAA;

  // Number of attached TBAA tags (used for debugging).
  unsigned tagAttachmentCounter = 0;

  // Mapping from a FIR type to the corresponding TBAATypeDescriptorAttr. It
  // must be populated during the type conversion. Currently unused.
  llvm::DenseMap<mlir::Type, mlir::LLVM::TBAATypeDescriptorAttr> typeDescMap;

  // Each TBAA tag is a tuple of <baseTypeSym, accessTypeSym, offset>.
  // This map holds a TBAATagAttr for each unique tuple.
  llvm::DenseMap<
      std::tuple<mlir::LLVM::TBAANodeAttr, mlir::LLVM::TBAANodeAttr, int64_t>,
      mlir::LLVM::TBAATagAttr>
      tagsMap;

  TBAAForrest trees;
};

} // namespace fir

#endif // FORTRAN_OPTIMIZER_CODEGEN_TBAABUILDER_H