llvm/flang/lib/Lower/OpenACC.cpp

//===-- OpenACC.cpp -- OpenACC directive lowering -------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
//
//===----------------------------------------------------------------------===//

#include "flang/Lower/OpenACC.h"
#include "DirectivesCommon.h"
#include "flang/Common/idioms.h"
#include "flang/Lower/Bridge.h"
#include "flang/Lower/ConvertType.h"
#include "flang/Lower/Mangler.h"
#include "flang/Lower/PFTBuilder.h"
#include "flang/Lower/StatementContext.h"
#include "flang/Lower/Support/Utils.h"
#include "flang/Optimizer/Builder/BoxValue.h"
#include "flang/Optimizer/Builder/Complex.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
#include "flang/Optimizer/Builder/IntrinsicCall.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Parser/parse-tree-visitor.h"
#include "flang/Parser/parse-tree.h"
#include "flang/Semantics/expression.h"
#include "flang/Semantics/scope.h"
#include "flang/Semantics/tools.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
#include "llvm/Frontend/OpenACC/ACC.h.inc"
#include "llvm/Support/Debug.h"

#define DEBUG_TYPE "flang-lower-openacc"

// Special value for * passed in device_type or gang clauses.
static constexpr std::int64_t starCst = -1;

static unsigned routineCounter = 0;
static constexpr llvm::StringRef accRoutinePrefix = "acc_routine_";
static constexpr llvm::StringRef accPrivateInitName = "acc.private.init";
static constexpr llvm::StringRef accReductionInitName = "acc.reduction.init";
static constexpr llvm::StringRef accFirDescriptorPostfix = "_desc";

static mlir::Location
genOperandLocation(Fortran::lower::AbstractConverter &converter,
                   const Fortran::parser::AccObject &accObject) {
  mlir::Location loc = converter.genUnknownLocation();
  Fortran::common::visit(
      Fortran::common::visitors{
          [&](const Fortran::parser::Designator &designator) {
            loc = converter.genLocation(designator.source);
          },
          [&](const Fortran::parser::Name &name) {
            loc = converter.genLocation(name.source);
          }},
      accObject.u);
  return loc;
}

static void addOperands(llvm::SmallVectorImpl<mlir::Value> &operands,
                        llvm::SmallVectorImpl<int32_t> &operandSegments,
                        llvm::ArrayRef<mlir::Value> clauseOperands) {
  operands.append(clauseOperands.begin(), clauseOperands.end());
  operandSegments.push_back(clauseOperands.size());
}

static void addOperand(llvm::SmallVectorImpl<mlir::Value> &operands,
                       llvm::SmallVectorImpl<int32_t> &operandSegments,
                       const mlir::Value &clauseOperand) {
  if (clauseOperand) {
    operands.push_back(clauseOperand);
    operandSegments.push_back(1);
  } else {
    operandSegments.push_back(0);
  }
}

template <typename Op>
static Op
createDataEntryOp(fir::FirOpBuilder &builder, mlir::Location loc,
                  mlir::Value baseAddr, std::stringstream &name,
                  mlir::SmallVector<mlir::Value> bounds, bool structured,
                  bool implicit, mlir::acc::DataClause dataClause,
                  mlir::Type retTy, llvm::ArrayRef<mlir::Value> async,
                  llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
                  llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes,
                  bool unwrapBoxAddr = false, mlir::Value isPresent = {}) {
  mlir::Value varPtrPtr;
  // The data clause may apply to either the box reference itself or the
  // pointer to the data it holds. So use `unwrapBoxAddr` to decide.
  // When we have a box value - assume it refers to the data inside box.
  if ((fir::isBoxAddress(baseAddr.getType()) && unwrapBoxAddr) ||
      fir::isa_box_type(baseAddr.getType())) {
    if (isPresent) {
      mlir::Type ifRetTy =
          mlir::cast<fir::BaseBoxType>(fir::unwrapRefType(baseAddr.getType()))
              .getEleTy();
      if (!fir::isa_ref_type(ifRetTy))
        ifRetTy = fir::ReferenceType::get(ifRetTy);
      baseAddr =
          builder
              .genIfOp(loc, {ifRetTy}, isPresent,
                       /*withElseRegion=*/true)
              .genThen([&]() {
                if (fir::isBoxAddress(baseAddr.getType()))
                  baseAddr = builder.create<fir::LoadOp>(loc, baseAddr);
                mlir::Value boxAddr =
                    builder.create<fir::BoxAddrOp>(loc, baseAddr);
                builder.create<fir::ResultOp>(loc, mlir::ValueRange{boxAddr});
              })
              .genElse([&] {
                mlir::Value absent =
                    builder.create<fir::AbsentOp>(loc, ifRetTy);
                builder.create<fir::ResultOp>(loc, mlir::ValueRange{absent});
              })
              .getResults()[0];
    } else {
      if (fir::isBoxAddress(baseAddr.getType()))
        baseAddr = builder.create<fir::LoadOp>(loc, baseAddr);
      baseAddr = builder.create<fir::BoxAddrOp>(loc, baseAddr);
    }
    retTy = baseAddr.getType();
  }

  llvm::SmallVector<mlir::Value, 8> operands;
  llvm::SmallVector<int32_t, 8> operandSegments;

  addOperand(operands, operandSegments, baseAddr);
  addOperand(operands, operandSegments, varPtrPtr);
  addOperands(operands, operandSegments, bounds);
  addOperands(operands, operandSegments, async);

  Op op = builder.create<Op>(loc, retTy, operands);
  op.setNameAttr(builder.getStringAttr(name.str()));
  op.setStructured(structured);
  op.setImplicit(implicit);
  op.setDataClause(dataClause);
  op->setAttr(Op::getOperandSegmentSizeAttr(),
              builder.getDenseI32ArrayAttr(operandSegments));
  if (!asyncDeviceTypes.empty())
    op.setAsyncOperandsDeviceTypeAttr(builder.getArrayAttr(asyncDeviceTypes));
  if (!asyncOnlyDeviceTypes.empty())
    op.setAsyncOnlyAttr(builder.getArrayAttr(asyncOnlyDeviceTypes));
  return op;
}

static void addDeclareAttr(fir::FirOpBuilder &builder, mlir::Operation *op,
                           mlir::acc::DataClause clause) {
  if (!op)
    return;
  op->setAttr(mlir::acc::getDeclareAttrName(),
              mlir::acc::DeclareAttr::get(builder.getContext(),
                                          mlir::acc::DataClauseAttr::get(
                                              builder.getContext(), clause)));
}

static mlir::func::FuncOp
createDeclareFunc(mlir::OpBuilder &modBuilder, fir::FirOpBuilder &builder,
                  mlir::Location loc, llvm::StringRef funcName,
                  llvm::SmallVector<mlir::Type> argsTy = {},
                  llvm::SmallVector<mlir::Location> locs = {}) {
  auto funcTy = mlir::FunctionType::get(modBuilder.getContext(), argsTy, {});
  auto funcOp = modBuilder.create<mlir::func::FuncOp>(loc, funcName, funcTy);
  funcOp.setVisibility(mlir::SymbolTable::Visibility::Private);
  builder.createBlock(&funcOp.getRegion(), funcOp.getRegion().end(), argsTy,
                      locs);
  builder.setInsertionPointToEnd(&funcOp.getRegion().back());
  builder.create<mlir::func::ReturnOp>(loc);
  builder.setInsertionPointToStart(&funcOp.getRegion().back());
  return funcOp;
}

template <typename Op>
static Op
createSimpleOp(fir::FirOpBuilder &builder, mlir::Location loc,
               const llvm::SmallVectorImpl<mlir::Value> &operands,
               const llvm::SmallVectorImpl<int32_t> &operandSegments) {
  llvm::ArrayRef<mlir::Type> argTy;
  Op op = builder.create<Op>(loc, argTy, operands);
  op->setAttr(Op::getOperandSegmentSizeAttr(),
              builder.getDenseI32ArrayAttr(operandSegments));
  return op;
}

template <typename EntryOp>
static void createDeclareAllocFuncWithArg(mlir::OpBuilder &modBuilder,
                                          fir::FirOpBuilder &builder,
                                          mlir::Location loc, mlir::Type descTy,
                                          llvm::StringRef funcNamePrefix,
                                          std::stringstream &asFortran,
                                          mlir::acc::DataClause clause) {
  auto crtInsPt = builder.saveInsertionPoint();
  std::stringstream registerFuncName;
  registerFuncName << funcNamePrefix.str()
                   << Fortran::lower::declarePostAllocSuffix.str();

  if (!mlir::isa<fir::ReferenceType>(descTy))
    descTy = fir::ReferenceType::get(descTy);
  auto registerFuncOp = createDeclareFunc(
      modBuilder, builder, loc, registerFuncName.str(), {descTy}, {loc});

  llvm::SmallVector<mlir::Value> bounds;
  std::stringstream asFortranDesc;
  asFortranDesc << asFortran.str() << accFirDescriptorPostfix.str();

  // Updating descriptor must occur before the mapping of the data so that
  // attached data pointer is not overwritten.
  mlir::acc::UpdateDeviceOp updateDeviceOp =
      createDataEntryOp<mlir::acc::UpdateDeviceOp>(
          builder, loc, registerFuncOp.getArgument(0), asFortranDesc, bounds,
          /*structured=*/false, /*implicit=*/true,
          mlir::acc::DataClause::acc_update_device, descTy,
          /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
  llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
  llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
  createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands, operandSegments);

  mlir::Value desc =
      builder.create<fir::LoadOp>(loc, registerFuncOp.getArgument(0));
  fir::BoxAddrOp boxAddrOp = builder.create<fir::BoxAddrOp>(loc, desc);
  addDeclareAttr(builder, boxAddrOp.getOperation(), clause);
  EntryOp entryOp = createDataEntryOp<EntryOp>(
      builder, loc, boxAddrOp.getResult(), asFortran, bounds,
      /*structured=*/false, /*implicit=*/false, clause, boxAddrOp.getType(),
      /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
  builder.create<mlir::acc::DeclareEnterOp>(
      loc, mlir::acc::DeclareTokenType::get(entryOp.getContext()),
      mlir::ValueRange(entryOp.getAccPtr()));

  modBuilder.setInsertionPointAfter(registerFuncOp);
  builder.restoreInsertionPoint(crtInsPt);
}

template <typename ExitOp>
static void createDeclareDeallocFuncWithArg(
    mlir::OpBuilder &modBuilder, fir::FirOpBuilder &builder, mlir::Location loc,
    mlir::Type descTy, llvm::StringRef funcNamePrefix,
    std::stringstream &asFortran, mlir::acc::DataClause clause) {
  auto crtInsPt = builder.saveInsertionPoint();
  // Generate the pre dealloc function.
  std::stringstream preDeallocFuncName;
  preDeallocFuncName << funcNamePrefix.str()
                     << Fortran::lower::declarePreDeallocSuffix.str();
  if (!mlir::isa<fir::ReferenceType>(descTy))
    descTy = fir::ReferenceType::get(descTy);
  auto preDeallocOp = createDeclareFunc(
      modBuilder, builder, loc, preDeallocFuncName.str(), {descTy}, {loc});
  mlir::Value loadOp =
      builder.create<fir::LoadOp>(loc, preDeallocOp.getArgument(0));
  fir::BoxAddrOp boxAddrOp = builder.create<fir::BoxAddrOp>(loc, loadOp);
  addDeclareAttr(builder, boxAddrOp.getOperation(), clause);

  llvm::SmallVector<mlir::Value> bounds;
  mlir::acc::GetDevicePtrOp entryOp =
      createDataEntryOp<mlir::acc::GetDevicePtrOp>(
          builder, loc, boxAddrOp.getResult(), asFortran, bounds,
          /*structured=*/false, /*implicit=*/false, clause, boxAddrOp.getType(),
          /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
  builder.create<mlir::acc::DeclareExitOp>(
      loc, mlir::Value{}, mlir::ValueRange(entryOp.getAccPtr()));

  if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> ||
                std::is_same_v<ExitOp, mlir::acc::UpdateHostOp>)
    builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(),
                           entryOp.getVarPtr(), entryOp.getBounds(),
                           entryOp.getAsyncOperands(),
                           entryOp.getAsyncOperandsDeviceTypeAttr(),
                           entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
                           /*structured=*/false, /*implicit=*/false,
                           builder.getStringAttr(*entryOp.getName()));
  else
    builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(),
                           entryOp.getBounds(), entryOp.getAsyncOperands(),
                           entryOp.getAsyncOperandsDeviceTypeAttr(),
                           entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
                           /*structured=*/false, /*implicit=*/false,
                           builder.getStringAttr(*entryOp.getName()));

  // Generate the post dealloc function.
  modBuilder.setInsertionPointAfter(preDeallocOp);
  std::stringstream postDeallocFuncName;
  postDeallocFuncName << funcNamePrefix.str()
                      << Fortran::lower::declarePostDeallocSuffix.str();
  auto postDeallocOp = createDeclareFunc(
      modBuilder, builder, loc, postDeallocFuncName.str(), {descTy}, {loc});
  loadOp = builder.create<fir::LoadOp>(loc, postDeallocOp.getArgument(0));
  asFortran << accFirDescriptorPostfix.str();
  mlir::acc::UpdateDeviceOp updateDeviceOp =
      createDataEntryOp<mlir::acc::UpdateDeviceOp>(
          builder, loc, loadOp, asFortran, bounds,
          /*structured=*/false, /*implicit=*/true,
          mlir::acc::DataClause::acc_update_device, loadOp.getType(),
          /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
  llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
  llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
  createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands, operandSegments);
  modBuilder.setInsertionPointAfter(postDeallocOp);
  builder.restoreInsertionPoint(crtInsPt);
}

Fortran::semantics::Symbol &
getSymbolFromAccObject(const Fortran::parser::AccObject &accObject) {
  if (const auto *designator =
          std::get_if<Fortran::parser::Designator>(&accObject.u)) {
    if (const auto *name =
            Fortran::semantics::getDesignatorNameIfDataRef(*designator))
      return *name->symbol;
    if (const auto *arrayElement =
            Fortran::parser::Unwrap<Fortran::parser::ArrayElement>(
                *designator)) {
      const Fortran::parser::Name &name =
          Fortran::parser::GetLastName(arrayElement->base);
      return *name.symbol;
    }
    if (const auto *component =
            Fortran::parser::Unwrap<Fortran::parser::StructureComponent>(
                *designator)) {
      return *component->component.symbol;
    }
  } else if (const auto *name =
                 std::get_if<Fortran::parser::Name>(&accObject.u)) {
    return *name->symbol;
  }
  llvm::report_fatal_error("Could not find symbol");
}

template <typename Op>
static void
genDataOperandOperations(const Fortran::parser::AccObjectList &objectList,
                         Fortran::lower::AbstractConverter &converter,
                         Fortran::semantics::SemanticsContext &semanticsContext,
                         Fortran::lower::StatementContext &stmtCtx,
                         llvm::SmallVectorImpl<mlir::Value> &dataOperands,
                         mlir::acc::DataClause dataClause, bool structured,
                         bool implicit, llvm::ArrayRef<mlir::Value> async,
                         llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
                         llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes,
                         bool setDeclareAttr = false) {
  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
  Fortran::evaluate::ExpressionAnalyzer ea{semanticsContext};
  for (const auto &accObject : objectList.v) {
    llvm::SmallVector<mlir::Value> bounds;
    std::stringstream asFortran;
    mlir::Location operandLocation = genOperandLocation(converter, accObject);
    Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject);
    Fortran::semantics::MaybeExpr designator = Fortran::common::visit(
        [&](auto &&s) { return ea.Analyze(s); }, accObject.u);
    Fortran::lower::AddrAndBoundsInfo info =
        Fortran::lower::gatherDataOperandAddrAndBounds<
            mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>(
            converter, builder, semanticsContext, stmtCtx, symbol, designator,
            operandLocation, asFortran, bounds,
            /*treatIndexAsSection=*/true);
    LLVM_DEBUG(llvm::dbgs() << __func__ << "\n"; info.dump(llvm::dbgs()));

    // If the input value is optional and is not a descriptor, we use the
    // rawInput directly.
    mlir::Value baseAddr = ((fir::unwrapRefType(info.addr.getType()) !=
                             fir::unwrapRefType(info.rawInput.getType())) &&
                            info.isPresent)
                               ? info.rawInput
                               : info.addr;
    Op op = createDataEntryOp<Op>(
        builder, operandLocation, baseAddr, asFortran, bounds, structured,
        implicit, dataClause, baseAddr.getType(), async, asyncDeviceTypes,
        asyncOnlyDeviceTypes, /*unwrapBoxAddr=*/true, info.isPresent);
    dataOperands.push_back(op.getAccPtr());
  }
}

template <typename EntryOp, typename ExitOp>
static void genDeclareDataOperandOperations(
    const Fortran::parser::AccObjectList &objectList,
    Fortran::lower::AbstractConverter &converter,
    Fortran::semantics::SemanticsContext &semanticsContext,
    Fortran::lower::StatementContext &stmtCtx,
    llvm::SmallVectorImpl<mlir::Value> &dataOperands,
    mlir::acc::DataClause dataClause, bool structured, bool implicit) {
  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
  Fortran::evaluate::ExpressionAnalyzer ea{semanticsContext};
  for (const auto &accObject : objectList.v) {
    llvm::SmallVector<mlir::Value> bounds;
    std::stringstream asFortran;
    mlir::Location operandLocation = genOperandLocation(converter, accObject);
    Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject);
    Fortran::semantics::MaybeExpr designator = Fortran::common::visit(
        [&](auto &&s) { return ea.Analyze(s); }, accObject.u);
    Fortran::lower::AddrAndBoundsInfo info =
        Fortran::lower::gatherDataOperandAddrAndBounds<
            mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>(
            converter, builder, semanticsContext, stmtCtx, symbol, designator,
            operandLocation, asFortran, bounds);
    LLVM_DEBUG(llvm::dbgs() << __func__ << "\n"; info.dump(llvm::dbgs()));
    EntryOp op = createDataEntryOp<EntryOp>(
        builder, operandLocation, info.addr, asFortran, bounds, structured,
        implicit, dataClause, info.addr.getType(),
        /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
    dataOperands.push_back(op.getAccPtr());
    addDeclareAttr(builder, op.getVarPtr().getDefiningOp(), dataClause);
    if (mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(info.addr.getType()))) {
      mlir::OpBuilder modBuilder(builder.getModule().getBodyRegion());
      modBuilder.setInsertionPointAfter(builder.getFunction());
      std::string prefix = converter.mangleName(symbol);
      createDeclareAllocFuncWithArg<EntryOp>(
          modBuilder, builder, operandLocation, info.addr.getType(), prefix,
          asFortran, dataClause);
      if constexpr (!std::is_same_v<EntryOp, ExitOp>)
        createDeclareDeallocFuncWithArg<ExitOp>(
            modBuilder, builder, operandLocation, info.addr.getType(), prefix,
            asFortran, dataClause);
    }
  }
}

template <typename EntryOp, typename ExitOp, typename Clause>
static void genDeclareDataOperandOperationsWithModifier(
    const Clause *x, Fortran::lower::AbstractConverter &converter,
    Fortran::semantics::SemanticsContext &semanticsContext,
    Fortran::lower::StatementContext &stmtCtx,
    Fortran::parser::AccDataModifier::Modifier mod,
    llvm::SmallVectorImpl<mlir::Value> &dataClauseOperands,
    const mlir::acc::DataClause clause,
    const mlir::acc::DataClause clauseWithModifier) {
  const Fortran::parser::AccObjectListWithModifier &listWithModifier = x->v;
  const auto &accObjectList =
      std::get<Fortran::parser::AccObjectList>(listWithModifier.t);
  const auto &modifier =
      std::get<std::optional<Fortran::parser::AccDataModifier>>(
          listWithModifier.t);
  mlir::acc::DataClause dataClause =
      (modifier && (*modifier).v == mod) ? clauseWithModifier : clause;
  genDeclareDataOperandOperations<EntryOp, ExitOp>(
      accObjectList, converter, semanticsContext, stmtCtx, dataClauseOperands,
      dataClause,
      /*structured=*/true, /*implicit=*/false);
}

template <typename EntryOp, typename ExitOp>
static void genDataExitOperations(fir::FirOpBuilder &builder,
                                  llvm::SmallVector<mlir::Value> operands,
                                  bool structured) {
  for (mlir::Value operand : operands) {
    auto entryOp = mlir::dyn_cast_or_null<EntryOp>(operand.getDefiningOp());
    assert(entryOp && "data entry op expected");
    if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> ||
                  std::is_same_v<ExitOp, mlir::acc::UpdateHostOp>)
      builder.create<ExitOp>(
          entryOp.getLoc(), entryOp.getAccPtr(), entryOp.getVarPtr(),
          entryOp.getBounds(), entryOp.getAsyncOperands(),
          entryOp.getAsyncOperandsDeviceTypeAttr(), entryOp.getAsyncOnlyAttr(),
          entryOp.getDataClause(), structured, entryOp.getImplicit(),
          builder.getStringAttr(*entryOp.getName()));
    else
      builder.create<ExitOp>(
          entryOp.getLoc(), entryOp.getAccPtr(), entryOp.getBounds(),
          entryOp.getAsyncOperands(), entryOp.getAsyncOperandsDeviceTypeAttr(),
          entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(), structured,
          entryOp.getImplicit(), builder.getStringAttr(*entryOp.getName()));
  }
}

fir::ShapeOp genShapeOp(mlir::OpBuilder &builder, fir::SequenceType seqTy,
                        mlir::Location loc) {
  llvm::SmallVector<mlir::Value> extents;
  mlir::Type idxTy = builder.getIndexType();
  for (auto extent : seqTy.getShape())
    extents.push_back(builder.create<mlir::arith::ConstantOp>(
        loc, idxTy, builder.getIntegerAttr(idxTy, extent)));
  return builder.create<fir::ShapeOp>(loc, extents);
}

template <typename RecipeOp>
static void genPrivateLikeInitRegion(mlir::OpBuilder &builder, RecipeOp recipe,
                                     mlir::Type ty, mlir::Location loc) {
  mlir::Value retVal = recipe.getInitRegion().front().getArgument(0);
  if (auto refTy = mlir::dyn_cast_or_null<fir::ReferenceType>(ty)) {
    if (fir::isa_trivial(refTy.getEleTy())) {
      auto alloca = builder.create<fir::AllocaOp>(loc, refTy.getEleTy());
      auto declareOp = builder.create<hlfir::DeclareOp>(
          loc, alloca, accPrivateInitName, /*shape=*/nullptr,
          llvm::ArrayRef<mlir::Value>{}, /*dummy_scope=*/nullptr,
          fir::FortranVariableFlagsAttr{});
      retVal = declareOp.getBase();
    } else if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(
                   refTy.getEleTy())) {
      if (fir::isa_trivial(seqTy.getEleTy())) {
        mlir::Value shape;
        llvm::SmallVector<mlir::Value> extents;
        if (seqTy.hasDynamicExtents()) {
          // Extents are passed as block arguments. First argument is the
          // original value.
          for (unsigned i = 1; i < recipe.getInitRegion().getArguments().size();
               ++i)
            extents.push_back(recipe.getInitRegion().getArgument(i));
          shape = builder.create<fir::ShapeOp>(loc, extents);
        } else {
          shape = genShapeOp(builder, seqTy, loc);
        }
        auto alloca = builder.create<fir::AllocaOp>(
            loc, seqTy, /*typeparams=*/mlir::ValueRange{}, extents);
        auto declareOp = builder.create<hlfir::DeclareOp>(
            loc, alloca, accPrivateInitName, shape,
            llvm::ArrayRef<mlir::Value>{}, /*dummy_scope=*/nullptr,
            fir::FortranVariableFlagsAttr{});
        retVal = declareOp.getBase();
      }
    }
  } else if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(ty)) {
    mlir::Type innerTy = fir::extractSequenceType(boxTy);
    if (!innerTy)
      TODO(loc, "Unsupported boxed type in OpenACC privatization");
    fir::FirOpBuilder firBuilder{builder, recipe.getOperation()};
    hlfir::Entity source = hlfir::Entity{retVal};
    auto [temp, cleanup] = hlfir::createTempFromMold(loc, firBuilder, source);
    retVal = temp;
  }
  builder.create<mlir::acc::YieldOp>(loc, retVal);
}

mlir::acc::PrivateRecipeOp
Fortran::lower::createOrGetPrivateRecipe(mlir::OpBuilder &builder,
                                         llvm::StringRef recipeName,
                                         mlir::Location loc, mlir::Type ty) {
  mlir::ModuleOp mod =
      builder.getBlock()->getParent()->getParentOfType<mlir::ModuleOp>();
  if (auto recipe = mod.lookupSymbol<mlir::acc::PrivateRecipeOp>(recipeName))
    return recipe;

  auto crtPos = builder.saveInsertionPoint();
  mlir::OpBuilder modBuilder(mod.getBodyRegion());
  auto recipe =
      modBuilder.create<mlir::acc::PrivateRecipeOp>(loc, recipeName, ty);
  llvm::SmallVector<mlir::Type> argsTy{ty};
  llvm::SmallVector<mlir::Location> argsLoc{loc};
  if (auto refTy = mlir::dyn_cast_or_null<fir::ReferenceType>(ty)) {
    if (auto seqTy =
            mlir::dyn_cast_or_null<fir::SequenceType>(refTy.getEleTy())) {
      if (seqTy.hasDynamicExtents()) {
        mlir::Type idxTy = builder.getIndexType();
        for (unsigned i = 0; i < seqTy.getDimension(); ++i) {
          argsTy.push_back(idxTy);
          argsLoc.push_back(loc);
        }
      }
    }
  }
  builder.createBlock(&recipe.getInitRegion(), recipe.getInitRegion().end(),
                      argsTy, argsLoc);
  builder.setInsertionPointToEnd(&recipe.getInitRegion().back());
  genPrivateLikeInitRegion<mlir::acc::PrivateRecipeOp>(builder, recipe, ty,
                                                       loc);
  builder.restoreInsertionPoint(crtPos);
  return recipe;
}

/// Check if the DataBoundsOp is a constant bound (lb and ub are constants or
/// extent is a constant).
bool isConstantBound(mlir::acc::DataBoundsOp &op) {
  if (op.getLowerbound() && fir::getIntIfConstant(op.getLowerbound()) &&
      op.getUpperbound() && fir::getIntIfConstant(op.getUpperbound()))
    return true;
  if (op.getExtent() && fir::getIntIfConstant(op.getExtent()))
    return true;
  return false;
}

/// Return true iff all the bounds are expressed with constant values.
bool areAllBoundConstant(const llvm::SmallVector<mlir::Value> &bounds) {
  for (auto bound : bounds) {
    auto dataBound =
        mlir::dyn_cast<mlir::acc::DataBoundsOp>(bound.getDefiningOp());
    assert(dataBound && "Must be DataBoundOp operation");
    if (!isConstantBound(dataBound))
      return false;
  }
  return true;
}

static llvm::SmallVector<mlir::Value>
genConstantBounds(fir::FirOpBuilder &builder, mlir::Location loc,
                  mlir::acc::DataBoundsOp &dataBound) {
  mlir::Type idxTy = builder.getIndexType();
  mlir::Value lb, ub, step;
  if (dataBound.getLowerbound() &&
      fir::getIntIfConstant(dataBound.getLowerbound()) &&
      dataBound.getUpperbound() &&
      fir::getIntIfConstant(dataBound.getUpperbound())) {
    lb = builder.createIntegerConstant(
        loc, idxTy, *fir::getIntIfConstant(dataBound.getLowerbound()));
    ub = builder.createIntegerConstant(
        loc, idxTy, *fir::getIntIfConstant(dataBound.getUpperbound()));
    step = builder.createIntegerConstant(loc, idxTy, 1);
  } else if (dataBound.getExtent()) {
    lb = builder.createIntegerConstant(loc, idxTy, 0);
    ub = builder.createIntegerConstant(
        loc, idxTy, *fir::getIntIfConstant(dataBound.getExtent()) - 1);
    step = builder.createIntegerConstant(loc, idxTy, 1);
  } else {
    llvm::report_fatal_error("Expect constant lb/ub or extent");
  }
  return {lb, ub, step};
}

static fir::ShapeOp genShapeFromBoundsOrArgs(
    mlir::Location loc, fir::FirOpBuilder &builder, fir::SequenceType seqTy,
    const llvm::SmallVector<mlir::Value> &bounds, mlir::ValueRange arguments) {
  llvm::SmallVector<mlir::Value> args;
  if (areAllBoundConstant(bounds)) {
    for (auto bound : llvm::reverse(bounds)) {
      auto dataBound =
          mlir::cast<mlir::acc::DataBoundsOp>(bound.getDefiningOp());
      args.append(genConstantBounds(builder, loc, dataBound));
    }
  } else {
    assert(((arguments.size() - 2) / 3 == seqTy.getDimension()) &&
           "Expect 3 block arguments per dimension");
    for (auto arg : arguments.drop_front(2))
      args.push_back(arg);
  }

  assert(args.size() % 3 == 0 && "Triplets must be a multiple of 3");
  llvm::SmallVector<mlir::Value> extents;
  mlir::Type idxTy = builder.getIndexType();
  mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
  mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0);
  for (unsigned i = 0; i < args.size(); i += 3) {
    mlir::Value s1 =
        builder.create<mlir::arith::SubIOp>(loc, args[i + 1], args[0]);
    mlir::Value s2 = builder.create<mlir::arith::AddIOp>(loc, s1, one);
    mlir::Value s3 = builder.create<mlir::arith::DivSIOp>(loc, s2, args[i + 2]);
    mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(
        loc, mlir::arith::CmpIPredicate::sgt, s3, zero);
    mlir::Value ext = builder.create<mlir::arith::SelectOp>(loc, cmp, s3, zero);
    extents.push_back(ext);
  }
  return builder.create<fir::ShapeOp>(loc, extents);
}

static hlfir::DesignateOp::Subscripts
getSubscriptsFromArgs(mlir::ValueRange args) {
  hlfir::DesignateOp::Subscripts triplets;
  for (unsigned i = 2; i < args.size(); i += 3)
    triplets.emplace_back(
        hlfir::DesignateOp::Triplet{args[i], args[i + 1], args[i + 2]});
  return triplets;
}

static hlfir::Entity genDesignateWithTriplets(
    fir::FirOpBuilder &builder, mlir::Location loc, hlfir::Entity &entity,
    hlfir::DesignateOp::Subscripts &triplets, mlir::Value shape) {
  llvm::SmallVector<mlir::Value> lenParams;
  hlfir::genLengthParameters(loc, builder, entity, lenParams);
  auto designate = builder.create<hlfir::DesignateOp>(
      loc, entity.getBase().getType(), entity, /*component=*/"",
      /*componentShape=*/mlir::Value{}, triplets,
      /*substring=*/mlir::ValueRange{}, /*complexPartAttr=*/std::nullopt, shape,
      lenParams);
  return hlfir::Entity{designate.getResult()};
}

mlir::acc::FirstprivateRecipeOp Fortran::lower::createOrGetFirstprivateRecipe(
    mlir::OpBuilder &builder, llvm::StringRef recipeName, mlir::Location loc,
    mlir::Type ty, llvm::SmallVector<mlir::Value> &bounds) {
  mlir::ModuleOp mod =
      builder.getBlock()->getParent()->getParentOfType<mlir::ModuleOp>();
  if (auto recipe =
          mod.lookupSymbol<mlir::acc::FirstprivateRecipeOp>(recipeName))
    return recipe;

  auto crtPos = builder.saveInsertionPoint();
  mlir::OpBuilder modBuilder(mod.getBodyRegion());
  auto recipe =
      modBuilder.create<mlir::acc::FirstprivateRecipeOp>(loc, recipeName, ty);
  llvm::SmallVector<mlir::Type> initArgsTy{ty};
  llvm::SmallVector<mlir::Location> initArgsLoc{loc};
  auto refTy = fir::unwrapRefType(ty);
  if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(refTy)) {
    if (seqTy.hasDynamicExtents()) {
      mlir::Type idxTy = builder.getIndexType();
      for (unsigned i = 0; i < seqTy.getDimension(); ++i) {
        initArgsTy.push_back(idxTy);
        initArgsLoc.push_back(loc);
      }
    }
  }
  builder.createBlock(&recipe.getInitRegion(), recipe.getInitRegion().end(),
                      initArgsTy, initArgsLoc);
  builder.setInsertionPointToEnd(&recipe.getInitRegion().back());
  genPrivateLikeInitRegion<mlir::acc::FirstprivateRecipeOp>(builder, recipe, ty,
                                                            loc);

  bool allConstantBound = areAllBoundConstant(bounds);
  llvm::SmallVector<mlir::Type> argsTy{ty, ty};
  llvm::SmallVector<mlir::Location> argsLoc{loc, loc};
  if (!allConstantBound) {
    for (mlir::Value bound : llvm::reverse(bounds)) {
      auto dataBound =
          mlir::dyn_cast<mlir::acc::DataBoundsOp>(bound.getDefiningOp());
      argsTy.push_back(dataBound.getLowerbound().getType());
      argsLoc.push_back(dataBound.getLowerbound().getLoc());
      argsTy.push_back(dataBound.getUpperbound().getType());
      argsLoc.push_back(dataBound.getUpperbound().getLoc());
      argsTy.push_back(dataBound.getStartIdx().getType());
      argsLoc.push_back(dataBound.getStartIdx().getLoc());
    }
  }
  builder.createBlock(&recipe.getCopyRegion(), recipe.getCopyRegion().end(),
                      argsTy, argsLoc);

  builder.setInsertionPointToEnd(&recipe.getCopyRegion().back());
  ty = fir::unwrapRefType(ty);
  if (fir::isa_trivial(ty)) {
    mlir::Value initValue = builder.create<fir::LoadOp>(
        loc, recipe.getCopyRegion().front().getArgument(0));
    builder.create<fir::StoreOp>(loc, initValue,
                                 recipe.getCopyRegion().front().getArgument(1));
  } else if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(ty)) {
    fir::FirOpBuilder firBuilder{builder, recipe.getOperation()};
    auto shape = genShapeFromBoundsOrArgs(
        loc, firBuilder, seqTy, bounds, recipe.getCopyRegion().getArguments());

    auto leftDeclOp = builder.create<hlfir::DeclareOp>(
        loc, recipe.getCopyRegion().getArgument(0), llvm::StringRef{}, shape,
        llvm::ArrayRef<mlir::Value>{}, /*dummy_scope=*/nullptr,
        fir::FortranVariableFlagsAttr{});
    auto rightDeclOp = builder.create<hlfir::DeclareOp>(
        loc, recipe.getCopyRegion().getArgument(1), llvm::StringRef{}, shape,
        llvm::ArrayRef<mlir::Value>{}, /*dummy_scope=*/nullptr,
        fir::FortranVariableFlagsAttr{});

    hlfir::DesignateOp::Subscripts triplets =
        getSubscriptsFromArgs(recipe.getCopyRegion().getArguments());
    auto leftEntity = hlfir::Entity{leftDeclOp.getBase()};
    auto left =
        genDesignateWithTriplets(firBuilder, loc, leftEntity, triplets, shape);
    auto rightEntity = hlfir::Entity{rightDeclOp.getBase()};
    auto right =
        genDesignateWithTriplets(firBuilder, loc, rightEntity, triplets, shape);

    firBuilder.create<hlfir::AssignOp>(loc, left, right);

  } else if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(ty)) {
    fir::FirOpBuilder firBuilder{builder, recipe.getOperation()};
    llvm::SmallVector<mlir::Value> tripletArgs;
    mlir::Type innerTy = fir::extractSequenceType(boxTy);
    fir::SequenceType seqTy =
        mlir::dyn_cast_or_null<fir::SequenceType>(innerTy);
    if (!seqTy)
      TODO(loc, "Unsupported boxed type in OpenACC firstprivate");

    auto shape = genShapeFromBoundsOrArgs(
        loc, firBuilder, seqTy, bounds, recipe.getCopyRegion().getArguments());
    hlfir::DesignateOp::Subscripts triplets =
        getSubscriptsFromArgs(recipe.getCopyRegion().getArguments());
    auto leftEntity = hlfir::Entity{recipe.getCopyRegion().getArgument(0)};
    auto left =
        genDesignateWithTriplets(firBuilder, loc, leftEntity, triplets, shape);
    auto rightEntity = hlfir::Entity{recipe.getCopyRegion().getArgument(1)};
    auto right =
        genDesignateWithTriplets(firBuilder, loc, rightEntity, triplets, shape);
    firBuilder.create<hlfir::AssignOp>(loc, left, right);
  }

  builder.create<mlir::acc::TerminatorOp>(loc);
  builder.restoreInsertionPoint(crtPos);
  return recipe;
}

/// Get a string representation of the bounds.
std::string getBoundsString(llvm::SmallVector<mlir::Value> &bounds) {
  std::stringstream boundStr;
  if (!bounds.empty())
    boundStr << "_section_";
  llvm::interleave(
      bounds,
      [&](mlir::Value bound) {
        auto boundsOp =
            mlir::cast<mlir::acc::DataBoundsOp>(bound.getDefiningOp());
        if (boundsOp.getLowerbound() &&
            fir::getIntIfConstant(boundsOp.getLowerbound()) &&
            boundsOp.getUpperbound() &&
            fir::getIntIfConstant(boundsOp.getUpperbound())) {
          boundStr << "lb" << *fir::getIntIfConstant(boundsOp.getLowerbound())
                   << ".ub" << *fir::getIntIfConstant(boundsOp.getUpperbound());
        } else if (boundsOp.getExtent() &&
                   fir::getIntIfConstant(boundsOp.getExtent())) {
          boundStr << "ext" << *fir::getIntIfConstant(boundsOp.getExtent());
        } else {
          boundStr << "?";
        }
      },
      [&] { boundStr << "x"; });
  return boundStr.str();
}

/// Rebuild the array type from the acc.bounds operation with constant
/// lowerbound/upperbound or extent.
mlir::Type getTypeFromBounds(llvm::SmallVector<mlir::Value> &bounds,
                             mlir::Type ty) {
  auto seqTy =
      mlir::dyn_cast_or_null<fir::SequenceType>(fir::unwrapRefType(ty));
  if (!bounds.empty() && seqTy) {
    llvm::SmallVector<int64_t> shape;
    for (auto b : bounds) {
      auto boundsOp =
          mlir::dyn_cast<mlir::acc::DataBoundsOp>(b.getDefiningOp());
      if (boundsOp.getLowerbound() &&
          fir::getIntIfConstant(boundsOp.getLowerbound()) &&
          boundsOp.getUpperbound() &&
          fir::getIntIfConstant(boundsOp.getUpperbound())) {
        int64_t ext = *fir::getIntIfConstant(boundsOp.getUpperbound()) -
                      *fir::getIntIfConstant(boundsOp.getLowerbound()) + 1;
        shape.push_back(ext);
      } else if (boundsOp.getExtent() &&
                 fir::getIntIfConstant(boundsOp.getExtent())) {
        shape.push_back(*fir::getIntIfConstant(boundsOp.getExtent()));
      } else {
        return ty; // TODO: handle dynamic shaped array slice.
      }
    }
    if (shape.empty() || shape.size() != bounds.size())
      return ty;
    auto newSeqTy = fir::SequenceType::get(shape, seqTy.getEleTy());
    if (mlir::isa<fir::ReferenceType, fir::PointerType>(ty))
      return fir::ReferenceType::get(newSeqTy);
    return newSeqTy;
  }
  return ty;
}

template <typename RecipeOp>
static void
genPrivatizations(const Fortran::parser::AccObjectList &objectList,
                  Fortran::lower::AbstractConverter &converter,
                  Fortran::semantics::SemanticsContext &semanticsContext,
                  Fortran::lower::StatementContext &stmtCtx,
                  llvm::SmallVectorImpl<mlir::Value> &dataOperands,
                  llvm::SmallVector<mlir::Attribute> &privatizations,
                  llvm::ArrayRef<mlir::Value> async,
                  llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
                  llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes) {
  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
  Fortran::evaluate::ExpressionAnalyzer ea{semanticsContext};
  for (const auto &accObject : objectList.v) {
    llvm::SmallVector<mlir::Value> bounds;
    std::stringstream asFortran;
    mlir::Location operandLocation = genOperandLocation(converter, accObject);
    Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject);
    Fortran::semantics::MaybeExpr designator = Fortran::common::visit(
        [&](auto &&s) { return ea.Analyze(s); }, accObject.u);
    Fortran::lower::AddrAndBoundsInfo info =
        Fortran::lower::gatherDataOperandAddrAndBounds<
            mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>(
            converter, builder, semanticsContext, stmtCtx, symbol, designator,
            operandLocation, asFortran, bounds);
    LLVM_DEBUG(llvm::dbgs() << __func__ << "\n"; info.dump(llvm::dbgs()));

    RecipeOp recipe;
    mlir::Type retTy = getTypeFromBounds(bounds, info.addr.getType());
    if constexpr (std::is_same_v<RecipeOp, mlir::acc::PrivateRecipeOp>) {
      std::string recipeName =
          fir::getTypeAsString(retTy, converter.getKindMap(),
                               Fortran::lower::privatizationRecipePrefix);
      recipe = Fortran::lower::createOrGetPrivateRecipe(builder, recipeName,
                                                        operandLocation, retTy);
      auto op = createDataEntryOp<mlir::acc::PrivateOp>(
          builder, operandLocation, info.addr, asFortran, bounds, true,
          /*implicit=*/false, mlir::acc::DataClause::acc_private, retTy, async,
          asyncDeviceTypes, asyncOnlyDeviceTypes, /*unwrapBoxAddr=*/true);
      dataOperands.push_back(op.getAccPtr());
    } else {
      std::string suffix =
          areAllBoundConstant(bounds) ? getBoundsString(bounds) : "";
      std::string recipeName = fir::getTypeAsString(
          retTy, converter.getKindMap(), "firstprivatization" + suffix);
      recipe = Fortran::lower::createOrGetFirstprivateRecipe(
          builder, recipeName, operandLocation, retTy, bounds);
      auto op = createDataEntryOp<mlir::acc::FirstprivateOp>(
          builder, operandLocation, info.addr, asFortran, bounds, true,
          /*implicit=*/false, mlir::acc::DataClause::acc_firstprivate, retTy,
          async, asyncDeviceTypes, asyncOnlyDeviceTypes,
          /*unwrapBoxAddr=*/true);
      dataOperands.push_back(op.getAccPtr());
    }
    privatizations.push_back(mlir::SymbolRefAttr::get(
        builder.getContext(), recipe.getSymName().str()));
  }
}

/// Return the corresponding enum value for the mlir::acc::ReductionOperator
/// from the parser representation.
static mlir::acc::ReductionOperator
getReductionOperator(const Fortran::parser::ReductionOperator &op) {
  switch (op.v) {
  case Fortran::parser::ReductionOperator::Operator::Plus:
    return mlir::acc::ReductionOperator::AccAdd;
  case Fortran::parser::ReductionOperator::Operator::Multiply:
    return mlir::acc::ReductionOperator::AccMul;
  case Fortran::parser::ReductionOperator::Operator::Max:
    return mlir::acc::ReductionOperator::AccMax;
  case Fortran::parser::ReductionOperator::Operator::Min:
    return mlir::acc::ReductionOperator::AccMin;
  case Fortran::parser::ReductionOperator::Operator::Iand:
    return mlir::acc::ReductionOperator::AccIand;
  case Fortran::parser::ReductionOperator::Operator::Ior:
    return mlir::acc::ReductionOperator::AccIor;
  case Fortran::parser::ReductionOperator::Operator::Ieor:
    return mlir::acc::ReductionOperator::AccXor;
  case Fortran::parser::ReductionOperator::Operator::And:
    return mlir::acc::ReductionOperator::AccLand;
  case Fortran::parser::ReductionOperator::Operator::Or:
    return mlir::acc::ReductionOperator::AccLor;
  case Fortran::parser::ReductionOperator::Operator::Eqv:
    return mlir::acc::ReductionOperator::AccEqv;
  case Fortran::parser::ReductionOperator::Operator::Neqv:
    return mlir::acc::ReductionOperator::AccNeqv;
  }
  llvm_unreachable("unexpected reduction operator");
}

/// Get the initial value for reduction operator.
template <typename R>
static R getReductionInitValue(mlir::acc::ReductionOperator op, mlir::Type ty) {
  if (op == mlir::acc::ReductionOperator::AccMin) {
    // min init value -> largest
    if constexpr (std::is_same_v<R, llvm::APInt>) {
      assert(ty.isIntOrIndex() && "expect integer or index type");
      return llvm::APInt::getSignedMaxValue(ty.getIntOrFloatBitWidth());
    }
    if constexpr (std::is_same_v<R, llvm::APFloat>) {
      auto floatTy = mlir::dyn_cast_or_null<mlir::FloatType>(ty);
      assert(floatTy && "expect float type");
      return llvm::APFloat::getLargest(floatTy.getFloatSemantics(),
                                       /*negative=*/false);
    }
  } else if (op == mlir::acc::ReductionOperator::AccMax) {
    // max init value -> smallest
    if constexpr (std::is_same_v<R, llvm::APInt>) {
      assert(ty.isIntOrIndex() && "expect integer or index type");
      return llvm::APInt::getSignedMinValue(ty.getIntOrFloatBitWidth());
    }
    if constexpr (std::is_same_v<R, llvm::APFloat>) {
      auto floatTy = mlir::dyn_cast_or_null<mlir::FloatType>(ty);
      assert(floatTy && "expect float type");
      return llvm::APFloat::getSmallest(floatTy.getFloatSemantics(),
                                        /*negative=*/true);
    }
  } else if (op == mlir::acc::ReductionOperator::AccIand) {
    if constexpr (std::is_same_v<R, llvm::APInt>) {
      assert(ty.isIntOrIndex() && "expect integer type");
      unsigned bits = ty.getIntOrFloatBitWidth();
      return llvm::APInt::getAllOnes(bits);
    }
  } else {
    // +, ior, ieor init value -> 0
    // * init value -> 1
    int64_t value = (op == mlir::acc::ReductionOperator::AccMul) ? 1 : 0;
    if constexpr (std::is_same_v<R, llvm::APInt>) {
      assert(ty.isIntOrIndex() && "expect integer or index type");
      return llvm::APInt(ty.getIntOrFloatBitWidth(), value, true);
    }

    if constexpr (std::is_same_v<R, llvm::APFloat>) {
      assert(mlir::isa<mlir::FloatType>(ty) && "expect float type");
      auto floatTy = mlir::dyn_cast<mlir::FloatType>(ty);
      return llvm::APFloat(floatTy.getFloatSemantics(), value);
    }

    if constexpr (std::is_same_v<R, int64_t>)
      return value;
  }
  llvm_unreachable("OpenACC reduction unsupported type");
}

/// Return a constant with the initial value for the reduction operator and
/// type combination.
static mlir::Value getReductionInitValue(fir::FirOpBuilder &builder,
                                         mlir::Location loc, mlir::Type ty,
                                         mlir::acc::ReductionOperator op) {
  if (op == mlir::acc::ReductionOperator::AccLand ||
      op == mlir::acc::ReductionOperator::AccLor ||
      op == mlir::acc::ReductionOperator::AccEqv ||
      op == mlir::acc::ReductionOperator::AccNeqv) {
    assert(mlir::isa<fir::LogicalType>(ty) && "expect fir.logical type");
    bool value = true; // .true. for .and. and .eqv.
    if (op == mlir::acc::ReductionOperator::AccLor ||
        op == mlir::acc::ReductionOperator::AccNeqv)
      value = false; // .false. for .or. and .neqv.
    return builder.createBool(loc, value);
  }
  if (ty.isIntOrIndex())
    return builder.create<mlir::arith::ConstantOp>(
        loc, ty,
        builder.getIntegerAttr(ty, getReductionInitValue<llvm::APInt>(op, ty)));
  if (op == mlir::acc::ReductionOperator::AccMin ||
      op == mlir::acc::ReductionOperator::AccMax) {
    if (mlir::isa<mlir::ComplexType>(ty))
      llvm::report_fatal_error(
          "min/max reduction not supported for complex type");
    if (auto floatTy = mlir::dyn_cast_or_null<mlir::FloatType>(ty))
      return builder.create<mlir::arith::ConstantOp>(
          loc, ty,
          builder.getFloatAttr(ty,
                               getReductionInitValue<llvm::APFloat>(op, ty)));
  } else if (auto floatTy = mlir::dyn_cast_or_null<mlir::FloatType>(ty)) {
    return builder.create<mlir::arith::ConstantOp>(
        loc, ty,
        builder.getFloatAttr(ty, getReductionInitValue<int64_t>(op, ty)));
  } else if (auto cmplxTy = mlir::dyn_cast_or_null<mlir::ComplexType>(ty)) {
    mlir::Type floatTy = cmplxTy.getElementType();
    mlir::Value realInit = builder.createRealConstant(
        loc, floatTy, getReductionInitValue<int64_t>(op, cmplxTy));
    mlir::Value imagInit = builder.createRealConstant(loc, floatTy, 0.0);
    return fir::factory::Complex{builder, loc}.createComplex(cmplxTy, realInit,
                                                             imagInit);
  }

  if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(ty))
    return getReductionInitValue(builder, loc, seqTy.getEleTy(), op);

  if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(ty))
    return getReductionInitValue(builder, loc, boxTy.getEleTy(), op);

  if (auto heapTy = mlir::dyn_cast<fir::HeapType>(ty))
    return getReductionInitValue(builder, loc, heapTy.getEleTy(), op);

  if (auto ptrTy = mlir::dyn_cast<fir::PointerType>(ty))
    return getReductionInitValue(builder, loc, ptrTy.getEleTy(), op);

  llvm::report_fatal_error("Unsupported OpenACC reduction type");
}

static mlir::Value genReductionInitRegion(fir::FirOpBuilder &builder,
                                          mlir::Location loc, mlir::Type ty,
                                          mlir::acc::ReductionOperator op) {
  ty = fir::unwrapRefType(ty);
  mlir::Value initValue = getReductionInitValue(builder, loc, ty, op);
  if (fir::isa_trivial(ty)) {
    mlir::Value alloca = builder.create<fir::AllocaOp>(loc, ty);
    auto declareOp = builder.create<hlfir::DeclareOp>(
        loc, alloca, accReductionInitName, /*shape=*/nullptr,
        llvm::ArrayRef<mlir::Value>{}, /*dummy_scope=*/nullptr,
        fir::FortranVariableFlagsAttr{});
    builder.create<fir::StoreOp>(loc, builder.createConvert(loc, ty, initValue),
                                 declareOp.getBase());
    return declareOp.getBase();
  } else if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(ty)) {
    if (fir::isa_trivial(seqTy.getEleTy())) {
      mlir::Value shape;
      auto extents = builder.getBlock()->getArguments().drop_front(1);
      if (seqTy.hasDynamicExtents())
        shape = builder.create<fir::ShapeOp>(loc, extents);
      else
        shape = genShapeOp(builder, seqTy, loc);
      mlir::Value alloca = builder.create<fir::AllocaOp>(
          loc, seqTy, /*typeparams=*/mlir::ValueRange{}, extents);
      auto declareOp = builder.create<hlfir::DeclareOp>(
          loc, alloca, accReductionInitName, shape,
          llvm::ArrayRef<mlir::Value>{}, /*dummy_scope=*/nullptr,
          fir::FortranVariableFlagsAttr{});
      mlir::Type idxTy = builder.getIndexType();
      mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
      llvm::SmallVector<fir::DoLoopOp> loops;
      llvm::SmallVector<mlir::Value> ivs;

      if (seqTy.hasDynamicExtents()) {
        builder.create<hlfir::AssignOp>(loc, initValue, declareOp.getBase());
        return declareOp.getBase();
      }
      for (auto ext : llvm::reverse(seqTy.getShape())) {
        auto lb = builder.createIntegerConstant(loc, idxTy, 0);
        auto ub = builder.createIntegerConstant(loc, idxTy, ext - 1);
        auto step = builder.createIntegerConstant(loc, idxTy, 1);
        auto loop = builder.create<fir::DoLoopOp>(loc, lb, ub, step,
                                                  /*unordered=*/false);
        builder.setInsertionPointToStart(loop.getBody());
        loops.push_back(loop);
        ivs.push_back(loop.getInductionVar());
      }
      auto coord = builder.create<fir::CoordinateOp>(loc, refTy,
                                                     declareOp.getBase(), ivs);
      builder.create<fir::StoreOp>(loc, initValue, coord);
      builder.setInsertionPointAfter(loops[0]);
      return declareOp.getBase();
    }
  } else if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(ty)) {
    mlir::Type innerTy = fir::extractSequenceType(boxTy);
    if (!mlir::isa<fir::SequenceType>(innerTy))
      TODO(loc, "Unsupported boxed type for reduction");
    // Create the private copy from the initial fir.box.
    hlfir::Entity source = hlfir::Entity{builder.getBlock()->getArgument(0)};
    auto [temp, cleanup] = hlfir::createTempFromMold(loc, builder, source);
    builder.create<hlfir::AssignOp>(loc, initValue, temp);
    return temp;
  }
  llvm::report_fatal_error("Unsupported OpenACC reduction type");
}

template <typename Op>
static mlir::Value genLogicalCombiner(fir::FirOpBuilder &builder,
                                      mlir::Location loc, mlir::Value value1,
                                      mlir::Value value2) {
  mlir::Type i1 = builder.getI1Type();
  mlir::Value v1 = builder.create<fir::ConvertOp>(loc, i1, value1);
  mlir::Value v2 = builder.create<fir::ConvertOp>(loc, i1, value2);
  mlir::Value combined = builder.create<Op>(loc, v1, v2);
  return builder.create<fir::ConvertOp>(loc, value1.getType(), combined);
}

static mlir::Value genComparisonCombiner(fir::FirOpBuilder &builder,
                                         mlir::Location loc,
                                         mlir::arith::CmpIPredicate pred,
                                         mlir::Value value1,
                                         mlir::Value value2) {
  mlir::Type i1 = builder.getI1Type();
  mlir::Value v1 = builder.create<fir::ConvertOp>(loc, i1, value1);
  mlir::Value v2 = builder.create<fir::ConvertOp>(loc, i1, value2);
  mlir::Value add = builder.create<mlir::arith::CmpIOp>(loc, pred, v1, v2);
  return builder.create<fir::ConvertOp>(loc, value1.getType(), add);
}

static mlir::Value genScalarCombiner(fir::FirOpBuilder &builder,
                                     mlir::Location loc,
                                     mlir::acc::ReductionOperator op,
                                     mlir::Type ty, mlir::Value value1,
                                     mlir::Value value2) {
  value1 = builder.loadIfRef(loc, value1);
  value2 = builder.loadIfRef(loc, value2);
  if (op == mlir::acc::ReductionOperator::AccAdd) {
    if (ty.isIntOrIndex())
      return builder.create<mlir::arith::AddIOp>(loc, value1, value2);
    if (mlir::isa<mlir::FloatType>(ty))
      return builder.create<mlir::arith::AddFOp>(loc, value1, value2);
    if (auto cmplxTy = mlir::dyn_cast_or_null<mlir::ComplexType>(ty))
      return builder.create<fir::AddcOp>(loc, value1, value2);
    TODO(loc, "reduction add type");
  }

  if (op == mlir::acc::ReductionOperator::AccMul) {
    if (ty.isIntOrIndex())
      return builder.create<mlir::arith::MulIOp>(loc, value1, value2);
    if (mlir::isa<mlir::FloatType>(ty))
      return builder.create<mlir::arith::MulFOp>(loc, value1, value2);
    if (mlir::isa<mlir::ComplexType>(ty))
      return builder.create<fir::MulcOp>(loc, value1, value2);
    TODO(loc, "reduction mul type");
  }

  if (op == mlir::acc::ReductionOperator::AccMin)
    return fir::genMin(builder, loc, {value1, value2});

  if (op == mlir::acc::ReductionOperator::AccMax)
    return fir::genMax(builder, loc, {value1, value2});

  if (op == mlir::acc::ReductionOperator::AccIand)
    return builder.create<mlir::arith::AndIOp>(loc, value1, value2);

  if (op == mlir::acc::ReductionOperator::AccIor)
    return builder.create<mlir::arith::OrIOp>(loc, value1, value2);

  if (op == mlir::acc::ReductionOperator::AccXor)
    return builder.create<mlir::arith::XOrIOp>(loc, value1, value2);

  if (op == mlir::acc::ReductionOperator::AccLand)
    return genLogicalCombiner<mlir::arith::AndIOp>(builder, loc, value1,
                                                   value2);

  if (op == mlir::acc::ReductionOperator::AccLor)
    return genLogicalCombiner<mlir::arith::OrIOp>(builder, loc, value1, value2);

  if (op == mlir::acc::ReductionOperator::AccEqv)
    return genComparisonCombiner(builder, loc, mlir::arith::CmpIPredicate::eq,
                                 value1, value2);

  if (op == mlir::acc::ReductionOperator::AccNeqv)
    return genComparisonCombiner(builder, loc, mlir::arith::CmpIPredicate::ne,
                                 value1, value2);

  TODO(loc, "reduction operator");
}

static hlfir::DesignateOp::Subscripts
getTripletsFromArgs(mlir::acc::ReductionRecipeOp recipe) {
  hlfir::DesignateOp::Subscripts triplets;
  for (unsigned i = 2; i < recipe.getCombinerRegion().getArguments().size();
       i += 3)
    triplets.emplace_back(hlfir::DesignateOp::Triplet{
        recipe.getCombinerRegion().getArgument(i),
        recipe.getCombinerRegion().getArgument(i + 1),
        recipe.getCombinerRegion().getArgument(i + 2)});
  return triplets;
}

static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
                        mlir::acc::ReductionOperator op, mlir::Type ty,
                        mlir::Value value1, mlir::Value value2,
                        mlir::acc::ReductionRecipeOp &recipe,
                        llvm::SmallVector<mlir::Value> &bounds,
                        bool allConstantBound) {
  ty = fir::unwrapRefType(ty);

  if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(ty)) {
    mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
    llvm::SmallVector<fir::DoLoopOp> loops;
    llvm::SmallVector<mlir::Value> ivs;
    if (seqTy.hasDynamicExtents()) {
      auto shape =
          genShapeFromBoundsOrArgs(loc, builder, seqTy, bounds,
                                   recipe.getCombinerRegion().getArguments());
      auto v1DeclareOp = builder.create<hlfir::DeclareOp>(
          loc, value1, llvm::StringRef{}, shape, llvm::ArrayRef<mlir::Value>{},
          /*dummy_scope=*/nullptr, fir::FortranVariableFlagsAttr{});
      auto v2DeclareOp = builder.create<hlfir::DeclareOp>(
          loc, value2, llvm::StringRef{}, shape, llvm::ArrayRef<mlir::Value>{},
          /*dummy_scope=*/nullptr, fir::FortranVariableFlagsAttr{});
      hlfir::DesignateOp::Subscripts triplets = getTripletsFromArgs(recipe);

      llvm::SmallVector<mlir::Value> lenParamsLeft;
      auto leftEntity = hlfir::Entity{v1DeclareOp.getBase()};
      hlfir::genLengthParameters(loc, builder, leftEntity, lenParamsLeft);
      auto leftDesignate = builder.create<hlfir::DesignateOp>(
          loc, v1DeclareOp.getBase().getType(), v1DeclareOp.getBase(),
          /*component=*/"",
          /*componentShape=*/mlir::Value{}, triplets,
          /*substring=*/mlir::ValueRange{}, /*complexPartAttr=*/std::nullopt,
          shape, lenParamsLeft);
      auto left = hlfir::Entity{leftDesignate.getResult()};

      llvm::SmallVector<mlir::Value> lenParamsRight;
      auto rightEntity = hlfir::Entity{v2DeclareOp.getBase()};
      hlfir::genLengthParameters(loc, builder, rightEntity, lenParamsLeft);
      auto rightDesignate = builder.create<hlfir::DesignateOp>(
          loc, v2DeclareOp.getBase().getType(), v2DeclareOp.getBase(),
          /*component=*/"",
          /*componentShape=*/mlir::Value{}, triplets,
          /*substring=*/mlir::ValueRange{}, /*complexPartAttr=*/std::nullopt,
          shape, lenParamsRight);
      auto right = hlfir::Entity{rightDesignate.getResult()};

      llvm::SmallVector<mlir::Value, 1> typeParams;
      auto genKernel = [&builder, &loc, op, seqTy, &left, &right](
                           mlir::Location l, fir::FirOpBuilder &b,
                           mlir::ValueRange oneBasedIndices) -> hlfir::Entity {
        auto leftElement = hlfir::getElementAt(l, b, left, oneBasedIndices);
        auto rightElement = hlfir::getElementAt(l, b, right, oneBasedIndices);
        auto leftVal = hlfir::loadTrivialScalar(l, b, leftElement);
        auto rightVal = hlfir::loadTrivialScalar(l, b, rightElement);
        return hlfir::Entity{genScalarCombiner(
            builder, loc, op, seqTy.getEleTy(), leftVal, rightVal)};
      };
      mlir::Value elemental = hlfir::genElementalOp(
          loc, builder, seqTy.getEleTy(), shape, typeParams, genKernel,
          /*isUnordered=*/true);
      builder.create<hlfir::AssignOp>(loc, elemental, v1DeclareOp.getBase());
      return;
    }
    if (allConstantBound) {
      // Use the constant bound directly in the combiner region so they do not
      // need to be passed as block argument.
      for (auto bound : llvm::reverse(bounds)) {
        auto dataBound =
            mlir::dyn_cast<mlir::acc::DataBoundsOp>(bound.getDefiningOp());
        llvm::SmallVector<mlir::Value> values =
            genConstantBounds(builder, loc, dataBound);
        auto loop =
            builder.create<fir::DoLoopOp>(loc, values[0], values[1], values[2],
                                          /*unordered=*/false);
        builder.setInsertionPointToStart(loop.getBody());
        loops.push_back(loop);
        ivs.push_back(loop.getInductionVar());
      }
    } else {
      // Lowerbound, upperbound and step are passed as block arguments.
      [[maybe_unused]] unsigned nbRangeArgs =
          recipe.getCombinerRegion().getArguments().size() - 2;
      assert((nbRangeArgs / 3 == seqTy.getDimension()) &&
             "Expect 3 block arguments per dimension");
      for (unsigned i = 2; i < recipe.getCombinerRegion().getArguments().size();
           i += 3) {
        mlir::Value lb = recipe.getCombinerRegion().getArgument(i);
        mlir::Value ub = recipe.getCombinerRegion().getArgument(i + 1);
        mlir::Value step = recipe.getCombinerRegion().getArgument(i + 2);
        auto loop = builder.create<fir::DoLoopOp>(loc, lb, ub, step,
                                                  /*unordered=*/false);
        builder.setInsertionPointToStart(loop.getBody());
        loops.push_back(loop);
        ivs.push_back(loop.getInductionVar());
      }
    }
    auto addr1 = builder.create<fir::CoordinateOp>(loc, refTy, value1, ivs);
    auto addr2 = builder.create<fir::CoordinateOp>(loc, refTy, value2, ivs);
    auto load1 = builder.create<fir::LoadOp>(loc, addr1);
    auto load2 = builder.create<fir::LoadOp>(loc, addr2);
    mlir::Value res =
        genScalarCombiner(builder, loc, op, seqTy.getEleTy(), load1, load2);
    builder.create<fir::StoreOp>(loc, res, addr1);
    builder.setInsertionPointAfter(loops[0]);
  } else if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(ty)) {
    mlir::Type innerTy = fir::extractSequenceType(boxTy);
    fir::SequenceType seqTy =
        mlir::dyn_cast_or_null<fir::SequenceType>(innerTy);
    if (!seqTy)
      TODO(loc, "Unsupported boxed type in OpenACC reduction");

    auto shape = genShapeFromBoundsOrArgs(
        loc, builder, seqTy, bounds, recipe.getCombinerRegion().getArguments());
    hlfir::DesignateOp::Subscripts triplets =
        getSubscriptsFromArgs(recipe.getCombinerRegion().getArguments());
    auto leftEntity = hlfir::Entity{value1};
    auto left =
        genDesignateWithTriplets(builder, loc, leftEntity, triplets, shape);
    auto rightEntity = hlfir::Entity{value2};
    auto right =
        genDesignateWithTriplets(builder, loc, rightEntity, triplets, shape);

    llvm::SmallVector<mlir::Value, 1> typeParams;
    auto genKernel = [&builder, &loc, op, seqTy, &left, &right](
                         mlir::Location l, fir::FirOpBuilder &b,
                         mlir::ValueRange oneBasedIndices) -> hlfir::Entity {
      auto leftElement = hlfir::getElementAt(l, b, left, oneBasedIndices);
      auto rightElement = hlfir::getElementAt(l, b, right, oneBasedIndices);
      auto leftVal = hlfir::loadTrivialScalar(l, b, leftElement);
      auto rightVal = hlfir::loadTrivialScalar(l, b, rightElement);
      return hlfir::Entity{genScalarCombiner(builder, loc, op, seqTy.getEleTy(),
                                             leftVal, rightVal)};
    };
    mlir::Value elemental = hlfir::genElementalOp(
        loc, builder, seqTy.getEleTy(), shape, typeParams, genKernel,
        /*isUnordered=*/true);
    builder.create<hlfir::AssignOp>(loc, elemental, value1);
  } else {
    mlir::Value res = genScalarCombiner(builder, loc, op, ty, value1, value2);
    builder.create<fir::StoreOp>(loc, res, value1);
  }
}

mlir::acc::ReductionRecipeOp Fortran::lower::createOrGetReductionRecipe(
    fir::FirOpBuilder &builder, llvm::StringRef recipeName, mlir::Location loc,
    mlir::Type ty, mlir::acc::ReductionOperator op,
    llvm::SmallVector<mlir::Value> &bounds) {
  mlir::ModuleOp mod =
      builder.getBlock()->getParent()->getParentOfType<mlir::ModuleOp>();
  if (auto recipe = mod.lookupSymbol<mlir::acc::ReductionRecipeOp>(recipeName))
    return recipe;

  auto crtPos = builder.saveInsertionPoint();
  mlir::OpBuilder modBuilder(mod.getBodyRegion());
  auto recipe =
      modBuilder.create<mlir::acc::ReductionRecipeOp>(loc, recipeName, ty, op);
  llvm::SmallVector<mlir::Type> initArgsTy{ty};
  llvm::SmallVector<mlir::Location> initArgsLoc{loc};
  mlir::Type refTy = fir::unwrapRefType(ty);
  if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(refTy)) {
    if (seqTy.hasDynamicExtents()) {
      mlir::Type idxTy = builder.getIndexType();
      for (unsigned i = 0; i < seqTy.getDimension(); ++i) {
        initArgsTy.push_back(idxTy);
        initArgsLoc.push_back(loc);
      }
    }
  }
  builder.createBlock(&recipe.getInitRegion(), recipe.getInitRegion().end(),
                      initArgsTy, initArgsLoc);
  builder.setInsertionPointToEnd(&recipe.getInitRegion().back());
  mlir::Value initValue = genReductionInitRegion(builder, loc, ty, op);
  builder.create<mlir::acc::YieldOp>(loc, initValue);

  // The two first block arguments are the two values to be combined.
  // The next arguments are the iteration ranges (lb, ub, step) to be used
  // for the combiner if needed.
  llvm::SmallVector<mlir::Type> argsTy{ty, ty};
  llvm::SmallVector<mlir::Location> argsLoc{loc, loc};
  bool allConstantBound = areAllBoundConstant(bounds);
  if (!allConstantBound) {
    for (mlir::Value bound : llvm::reverse(bounds)) {
      auto dataBound =
          mlir::dyn_cast<mlir::acc::DataBoundsOp>(bound.getDefiningOp());
      argsTy.push_back(dataBound.getLowerbound().getType());
      argsLoc.push_back(dataBound.getLowerbound().getLoc());
      argsTy.push_back(dataBound.getUpperbound().getType());
      argsLoc.push_back(dataBound.getUpperbound().getLoc());
      argsTy.push_back(dataBound.getStartIdx().getType());
      argsLoc.push_back(dataBound.getStartIdx().getLoc());
    }
  }
  builder.createBlock(&recipe.getCombinerRegion(),
                      recipe.getCombinerRegion().end(), argsTy, argsLoc);
  builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back());
  mlir::Value v1 = recipe.getCombinerRegion().front().getArgument(0);
  mlir::Value v2 = recipe.getCombinerRegion().front().getArgument(1);
  genCombiner(builder, loc, op, ty, v1, v2, recipe, bounds, allConstantBound);
  builder.create<mlir::acc::YieldOp>(loc, v1);
  builder.restoreInsertionPoint(crtPos);
  return recipe;
}

static bool isSupportedReductionType(mlir::Type ty) {
  ty = fir::unwrapRefType(ty);
  if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(ty))
    return isSupportedReductionType(boxTy.getEleTy());
  if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(ty))
    return isSupportedReductionType(seqTy.getEleTy());
  if (auto heapTy = mlir::dyn_cast<fir::HeapType>(ty))
    return isSupportedReductionType(heapTy.getEleTy());
  if (auto ptrTy = mlir::dyn_cast<fir::PointerType>(ty))
    return isSupportedReductionType(ptrTy.getEleTy());
  return fir::isa_trivial(ty);
}

static void
genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
              Fortran::lower::AbstractConverter &converter,
              Fortran::semantics::SemanticsContext &semanticsContext,
              Fortran::lower::StatementContext &stmtCtx,
              llvm::SmallVectorImpl<mlir::Value> &reductionOperands,
              llvm::SmallVector<mlir::Attribute> &reductionRecipes,
              llvm::ArrayRef<mlir::Value> async,
              llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
              llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes) {
  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
  const auto &objects = std::get<Fortran::parser::AccObjectList>(objectList.t);
  const auto &op = std::get<Fortran::parser::ReductionOperator>(objectList.t);
  mlir::acc::ReductionOperator mlirOp = getReductionOperator(op);
  Fortran::evaluate::ExpressionAnalyzer ea{semanticsContext};
  for (const auto &accObject : objects.v) {
    llvm::SmallVector<mlir::Value> bounds;
    std::stringstream asFortran;
    mlir::Location operandLocation = genOperandLocation(converter, accObject);
    Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject);
    Fortran::semantics::MaybeExpr designator = Fortran::common::visit(
        [&](auto &&s) { return ea.Analyze(s); }, accObject.u);
    Fortran::lower::AddrAndBoundsInfo info =
        Fortran::lower::gatherDataOperandAddrAndBounds<
            mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>(
            converter, builder, semanticsContext, stmtCtx, symbol, designator,
            operandLocation, asFortran, bounds);
    LLVM_DEBUG(llvm::dbgs() << __func__ << "\n"; info.dump(llvm::dbgs()));

    mlir::Type reductionTy = fir::unwrapRefType(info.addr.getType());
    if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(reductionTy))
      reductionTy = seqTy.getEleTy();

    if (!isSupportedReductionType(reductionTy))
      TODO(operandLocation, "reduction with unsupported type");

    auto op = createDataEntryOp<mlir::acc::ReductionOp>(
        builder, operandLocation, info.addr, asFortran, bounds,
        /*structured=*/true, /*implicit=*/false,
        mlir::acc::DataClause::acc_reduction, info.addr.getType(), async,
        asyncDeviceTypes, asyncOnlyDeviceTypes, /*unwrapBoxAddr=*/true);
    mlir::Type ty = op.getAccPtr().getType();
    if (!areAllBoundConstant(bounds) ||
        fir::isAssumedShape(info.addr.getType()) ||
        fir::isAllocatableOrPointerArray(info.addr.getType()))
      ty = info.addr.getType();
    std::string suffix =
        areAllBoundConstant(bounds) ? getBoundsString(bounds) : "";
    std::string recipeName = fir::getTypeAsString(
        ty, converter.getKindMap(),
        ("reduction_" + stringifyReductionOperator(mlirOp)).str() + suffix);

    mlir::acc::ReductionRecipeOp recipe =
        Fortran::lower::createOrGetReductionRecipe(
            builder, recipeName, operandLocation, ty, mlirOp, bounds);
    reductionRecipes.push_back(mlir::SymbolRefAttr::get(
        builder.getContext(), recipe.getSymName().str()));
    reductionOperands.push_back(op.getAccPtr());
  }
}

template <typename Op, typename Terminator>
static Op
createRegionOp(fir::FirOpBuilder &builder, mlir::Location loc,
               mlir::Location returnLoc, Fortran::lower::pft::Evaluation &eval,
               const llvm::SmallVectorImpl<mlir::Value> &operands,
               const llvm::SmallVectorImpl<int32_t> &operandSegments,
               bool outerCombined = false,
               llvm::SmallVector<mlir::Type> retTy = {},
               mlir::Value yieldValue = {}, mlir::TypeRange argsTy = {},
               llvm::SmallVector<mlir::Location> locs = {}) {
  Op op = builder.create<Op>(loc, retTy, operands);
  builder.createBlock(&op.getRegion(), op.getRegion().end(), argsTy, locs);
  mlir::Block &block = op.getRegion().back();
  builder.setInsertionPointToStart(&block);

  op->setAttr(Op::getOperandSegmentSizeAttr(),
              builder.getDenseI32ArrayAttr(operandSegments));

  // Place the insertion point to the start of the first block.
  builder.setInsertionPointToStart(&block);

  // If it is an unstructured region and is not the outer region of a combined
  // construct, create empty blocks for all evaluations.
  if (eval.lowerAsUnstructured() && !outerCombined)
    Fortran::lower::createEmptyRegionBlocks<mlir::acc::TerminatorOp,
                                            mlir::acc::YieldOp>(
        builder, eval.getNestedEvaluations());

  if (yieldValue) {
    if constexpr (std::is_same_v<Terminator, mlir::acc::YieldOp>) {
      Terminator yieldOp = builder.create<Terminator>(returnLoc, yieldValue);
      yieldValue.getDefiningOp()->moveBefore(yieldOp);
    } else {
      builder.create<Terminator>(returnLoc);
    }
  } else {
    builder.create<Terminator>(returnLoc);
  }
  builder.setInsertionPointToStart(&block);
  return op;
}

static void genAsyncClause(Fortran::lower::AbstractConverter &converter,
                           const Fortran::parser::AccClause::Async *asyncClause,
                           mlir::Value &async, bool &addAsyncAttr,
                           Fortran::lower::StatementContext &stmtCtx) {
  const auto &asyncClauseValue = asyncClause->v;
  if (asyncClauseValue) { // async has a value.
    async = fir::getBase(converter.genExprValue(
        *Fortran::semantics::GetExpr(*asyncClauseValue), stmtCtx));
  } else {
    addAsyncAttr = true;
  }
}

static void
genAsyncClause(Fortran::lower::AbstractConverter &converter,
               const Fortran::parser::AccClause::Async *asyncClause,
               llvm::SmallVector<mlir::Value> &async,
               llvm::SmallVector<mlir::Attribute> &asyncDeviceTypes,
               llvm::SmallVector<mlir::Attribute> &asyncOnlyDeviceTypes,
               llvm::SmallVector<mlir::Attribute> &deviceTypeAttrs,
               Fortran::lower::StatementContext &stmtCtx) {
  const auto &asyncClauseValue = asyncClause->v;
  if (asyncClauseValue) { // async has a value.
    mlir::Value asyncValue = fir::getBase(converter.genExprValue(
        *Fortran::semantics::GetExpr(*asyncClauseValue), stmtCtx));
    for (auto deviceTypeAttr : deviceTypeAttrs) {
      async.push_back(asyncValue);
      asyncDeviceTypes.push_back(deviceTypeAttr);
    }
  } else {
    for (auto deviceTypeAttr : deviceTypeAttrs)
      asyncOnlyDeviceTypes.push_back(deviceTypeAttr);
  }
}

static mlir::acc::DeviceType
getDeviceType(Fortran::common::OpenACCDeviceType device) {
  switch (device) {
  case Fortran::common::OpenACCDeviceType::Star:
    return mlir::acc::DeviceType::Star;
  case Fortran::common::OpenACCDeviceType::Default:
    return mlir::acc::DeviceType::Default;
  case Fortran::common::OpenACCDeviceType::Nvidia:
    return mlir::acc::DeviceType::Nvidia;
  case Fortran::common::OpenACCDeviceType::Radeon:
    return mlir::acc::DeviceType::Radeon;
  case Fortran::common::OpenACCDeviceType::Host:
    return mlir::acc::DeviceType::Host;
  case Fortran::common::OpenACCDeviceType::Multicore:
    return mlir::acc::DeviceType::Multicore;
  case Fortran::common::OpenACCDeviceType::None:
    return mlir::acc::DeviceType::None;
  }
  return mlir::acc::DeviceType::None;
}

static void gatherDeviceTypeAttrs(
    fir::FirOpBuilder &builder,
    const Fortran::parser::AccClause::DeviceType *deviceTypeClause,
    llvm::SmallVector<mlir::Attribute> &deviceTypes) {
  const Fortran::parser::AccDeviceTypeExprList &deviceTypeExprList =
      deviceTypeClause->v;
  for (const auto &deviceTypeExpr : deviceTypeExprList.v)
    deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
        builder.getContext(), getDeviceType(deviceTypeExpr.v)));
}

static void genIfClause(Fortran::lower::AbstractConverter &converter,
                        mlir::Location clauseLocation,
                        const Fortran::parser::AccClause::If *ifClause,
                        mlir::Value &ifCond,
                        Fortran::lower::StatementContext &stmtCtx) {
  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
  mlir::Value cond = fir::getBase(converter.genExprValue(
      *Fortran::semantics::GetExpr(ifClause->v), stmtCtx, &clauseLocation));
  ifCond = firOpBuilder.createConvert(clauseLocation, firOpBuilder.getI1Type(),
                                      cond);
}

static void genWaitClause(Fortran::lower::AbstractConverter &converter,
                          const Fortran::parser::AccClause::Wait *waitClause,
                          llvm::SmallVectorImpl<mlir::Value> &operands,
                          mlir::Value &waitDevnum, bool &addWaitAttr,
                          Fortran::lower::StatementContext &stmtCtx) {
  const auto &waitClauseValue = waitClause->v;
  if (waitClauseValue) { // wait has a value.
    const Fortran::parser::AccWaitArgument &waitArg = *waitClauseValue;
    const auto &waitList =
        std::get<std::list<Fortran::parser::ScalarIntExpr>>(waitArg.t);
    for (const Fortran::parser::ScalarIntExpr &value : waitList) {
      mlir::Value v = fir::getBase(
          converter.genExprValue(*Fortran::semantics::GetExpr(value), stmtCtx));
      operands.push_back(v);
    }

    const auto &waitDevnumValue =
        std::get<std::optional<Fortran::parser::ScalarIntExpr>>(waitArg.t);
    if (waitDevnumValue)
      waitDevnum = fir::getBase(converter.genExprValue(
          *Fortran::semantics::GetExpr(*waitDevnumValue), stmtCtx));
  } else {
    addWaitAttr = true;
  }
}

static void genWaitClauseWithDeviceType(
    Fortran::lower::AbstractConverter &converter,
    const Fortran::parser::AccClause::Wait *waitClause,
    llvm::SmallVector<mlir::Value> &waitOperands,
    llvm::SmallVector<mlir::Attribute> &waitOperandsDeviceTypes,
    llvm::SmallVector<mlir::Attribute> &waitOnlyDeviceTypes,
    llvm::SmallVector<bool> &hasDevnums,
    llvm::SmallVector<int32_t> &waitOperandsSegments,
    llvm::SmallVector<mlir::Attribute> deviceTypeAttrs,
    Fortran::lower::StatementContext &stmtCtx) {
  const auto &waitClauseValue = waitClause->v;
  if (waitClauseValue) { // wait has a value.
    llvm::SmallVector<mlir::Value> waitValues;

    const Fortran::parser::AccWaitArgument &waitArg = *waitClauseValue;
    const auto &waitDevnumValue =
        std::get<std::optional<Fortran::parser::ScalarIntExpr>>(waitArg.t);
    bool hasDevnum = false;
    if (waitDevnumValue) {
      waitValues.push_back(fir::getBase(converter.genExprValue(
          *Fortran::semantics::GetExpr(*waitDevnumValue), stmtCtx)));
      hasDevnum = true;
    }

    const auto &waitList =
        std::get<std::list<Fortran::parser::ScalarIntExpr>>(waitArg.t);
    for (const Fortran::parser::ScalarIntExpr &value : waitList) {
      waitValues.push_back(fir::getBase(converter.genExprValue(
          *Fortran::semantics::GetExpr(value), stmtCtx)));
    }

    for (auto deviceTypeAttr : deviceTypeAttrs) {
      for (auto value : waitValues)
        waitOperands.push_back(value);
      waitOperandsDeviceTypes.push_back(deviceTypeAttr);
      waitOperandsSegments.push_back(waitValues.size());
      hasDevnums.push_back(hasDevnum);
    }
  } else {
    for (auto deviceTypeAttr : deviceTypeAttrs)
      waitOnlyDeviceTypes.push_back(deviceTypeAttr);
  }
}

mlir::Type getTypeFromIvTypeSize(fir::FirOpBuilder &builder,
                                 const Fortran::semantics::Symbol &ivSym) {
  std::size_t ivTypeSize = ivSym.size();
  if (ivTypeSize == 0)
    llvm::report_fatal_error("unexpected induction variable size");
  // ivTypeSize is in bytes and IntegerType needs to be in bits.
  return builder.getIntegerType(ivTypeSize * 8);
}

static void privatizeIv(Fortran::lower::AbstractConverter &converter,
                        const Fortran::semantics::Symbol &sym,
                        mlir::Location loc,
                        llvm::SmallVector<mlir::Type> &ivTypes,
                        llvm::SmallVector<mlir::Location> &ivLocs,
                        llvm::SmallVector<mlir::Value> &privateOperands,
                        llvm::SmallVector<mlir::Value> &ivPrivate,
                        llvm::SmallVector<mlir::Attribute> &privatizations,
                        bool isDoConcurrent = false) {
  fir::FirOpBuilder &builder = converter.getFirOpBuilder();

  mlir::Type ivTy = getTypeFromIvTypeSize(builder, sym);
  ivTypes.push_back(ivTy);
  ivLocs.push_back(loc);
  mlir::Value ivValue = converter.getSymbolAddress(sym);
  if (!ivValue && isDoConcurrent) {
    // DO CONCURRENT induction variables are not mapped yet since they are local
    // to the DO CONCURRENT scope.
    mlir::OpBuilder::InsertPoint insPt = builder.saveInsertionPoint();
    builder.setInsertionPointToStart(builder.getAllocaBlock());
    ivValue = builder.createTemporaryAlloc(loc, ivTy, toStringRef(sym.name()));
    builder.restoreInsertionPoint(insPt);
  }

  std::string recipeName =
      fir::getTypeAsString(ivValue.getType(), converter.getKindMap(),
                           Fortran::lower::privatizationRecipePrefix);
  auto recipe = Fortran::lower::createOrGetPrivateRecipe(
      builder, recipeName, loc, ivValue.getType());

  std::stringstream asFortran;
  auto op = createDataEntryOp<mlir::acc::PrivateOp>(
      builder, loc, ivValue, asFortran, {}, true, /*implicit=*/true,
      mlir::acc::DataClause::acc_private, ivValue.getType(),
      /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});

  privateOperands.push_back(op.getAccPtr());
  privatizations.push_back(mlir::SymbolRefAttr::get(builder.getContext(),
                                                    recipe.getSymName().str()));

  // Map the new private iv to its symbol for the scope of the loop. bindSymbol
  // might create a hlfir.declare op, if so, we map its result in order to
  // use the sym value in the scope.
  converter.bindSymbol(sym, op.getAccPtr());
  auto privateValue = converter.getSymbolAddress(sym);
  if (auto declareOp =
          mlir::dyn_cast<hlfir::DeclareOp>(privateValue.getDefiningOp()))
    privateValue = declareOp.getResults()[0];
  ivPrivate.push_back(privateValue);
}

static mlir::acc::LoopOp createLoopOp(
    Fortran::lower::AbstractConverter &converter,
    mlir::Location currentLocation,
    Fortran::semantics::SemanticsContext &semanticsContext,
    Fortran::lower::StatementContext &stmtCtx,
    const Fortran::parser::DoConstruct &outerDoConstruct,
    Fortran::lower::pft::Evaluation &eval,
    const Fortran::parser::AccClauseList &accClauseList,
    std::optional<mlir::acc::CombinedConstructsType> combinedConstructs =
        std::nullopt,
    bool needEarlyReturnHandling = false) {
  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
  llvm::SmallVector<mlir::Value> tileOperands, privateOperands, ivPrivate,
      reductionOperands, cacheOperands, vectorOperands, workerNumOperands,
      gangOperands, lowerbounds, upperbounds, steps;
  llvm::SmallVector<mlir::Attribute> privatizations, reductionRecipes;
  llvm::SmallVector<int32_t> tileOperandsSegments, gangOperandsSegments;
  llvm::SmallVector<int64_t> collapseValues;

  llvm::SmallVector<mlir::Attribute> gangArgTypes;
  llvm::SmallVector<mlir::Attribute> seqDeviceTypes, independentDeviceTypes,
      autoDeviceTypes, vectorOperandsDeviceTypes, workerNumOperandsDeviceTypes,
      vectorDeviceTypes, workerNumDeviceTypes, tileOperandsDeviceTypes,
      collapseDeviceTypes, gangDeviceTypes, gangOperandsDeviceTypes;

  // device_type attribute is set to `none` until a device_type clause is
  // encountered.
  llvm::SmallVector<mlir::Attribute> crtDeviceTypes;
  crtDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
      builder.getContext(), mlir::acc::DeviceType::None));

  llvm::SmallVector<mlir::Type> ivTypes;
  llvm::SmallVector<mlir::Location> ivLocs;
  llvm::SmallVector<bool> inclusiveBounds;

  llvm::SmallVector<mlir::Location> locs;
  locs.push_back(currentLocation); // Location of the directive
  Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation();
  bool isDoConcurrent = outerDoConstruct.IsDoConcurrent();
  if (isDoConcurrent) {
    locs.push_back(converter.genLocation(
        Fortran::parser::FindSourceLocation(outerDoConstruct)));
    const Fortran::parser::LoopControl *loopControl =
        &*outerDoConstruct.GetLoopControl();
    const auto &concurrent =
        std::get<Fortran::parser::LoopControl::Concurrent>(loopControl->u);
    if (!std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent.t)
             .empty())
      TODO(currentLocation, "DO CONCURRENT with locality spec");

    const auto &concurrentHeader =
        std::get<Fortran::parser::ConcurrentHeader>(concurrent.t);
    const auto &controls =
        std::get<std::list<Fortran::parser::ConcurrentControl>>(
            concurrentHeader.t);
    for (const auto &control : controls) {
      lowerbounds.push_back(fir::getBase(converter.genExprValue(
          *Fortran::semantics::GetExpr(std::get<1>(control.t)), stmtCtx)));
      upperbounds.push_back(fir::getBase(converter.genExprValue(
          *Fortran::semantics::GetExpr(std::get<2>(control.t)), stmtCtx)));
      if (const auto &expr =
              std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
                  control.t))
        steps.push_back(fir::getBase(converter.genExprValue(
            *Fortran::semantics::GetExpr(*expr), stmtCtx)));
      else // If `step` is not present, assume it is `1`.
        steps.push_back(builder.createIntegerConstant(
            currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));

      const auto &name = std::get<Fortran::parser::Name>(control.t);
      privatizeIv(converter, *name.symbol, currentLocation, ivTypes, ivLocs,
                  privateOperands, ivPrivate, privatizations, isDoConcurrent);

      inclusiveBounds.push_back(true);
    }
  } else {
    int64_t collapseValue = Fortran::lower::getCollapseValue(accClauseList);
    for (unsigned i = 0; i < collapseValue; ++i) {
      const Fortran::parser::LoopControl *loopControl;
      if (i == 0) {
        loopControl = &*outerDoConstruct.GetLoopControl();
        locs.push_back(converter.genLocation(
            Fortran::parser::FindSourceLocation(outerDoConstruct)));
      } else {
        auto *doCons = crtEval->getIf<Fortran::parser::DoConstruct>();
        assert(doCons && "expect do construct");
        loopControl = &*doCons->GetLoopControl();
        locs.push_back(converter.genLocation(
            Fortran::parser::FindSourceLocation(*doCons)));
      }

      const Fortran::parser::LoopControl::Bounds *bounds =
          std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
      assert(bounds && "Expected bounds on the loop construct");
      lowerbounds.push_back(fir::getBase(converter.genExprValue(
          *Fortran::semantics::GetExpr(bounds->lower), stmtCtx)));
      upperbounds.push_back(fir::getBase(converter.genExprValue(
          *Fortran::semantics::GetExpr(bounds->upper), stmtCtx)));
      if (bounds->step)
        steps.push_back(fir::getBase(converter.genExprValue(
            *Fortran::semantics::GetExpr(bounds->step), stmtCtx)));
      else // If `step` is not present, assume it is `1`.
        steps.push_back(builder.createIntegerConstant(
            currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));

      Fortran::semantics::Symbol &ivSym =
          bounds->name.thing.symbol->GetUltimate();
      privatizeIv(converter, ivSym, currentLocation, ivTypes, ivLocs,
                  privateOperands, ivPrivate, privatizations);

      inclusiveBounds.push_back(true);

      if (i < collapseValue - 1)
        crtEval = &*std::next(crtEval->getNestedEvaluations().begin());
    }
  }

  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
    mlir::Location clauseLocation = converter.genLocation(clause.source);
    if (const auto *gangClause =
            std::get_if<Fortran::parser::AccClause::Gang>(&clause.u)) {
      if (gangClause->v) {
        const Fortran::parser::AccGangArgList &x = *gangClause->v;
        mlir::SmallVector<mlir::Value> gangValues;
        mlir::SmallVector<mlir::Attribute> gangArgs;
        for (const Fortran::parser::AccGangArg &gangArg : x.v) {
          if (const auto *num =
                  std::get_if<Fortran::parser::AccGangArg::Num>(&gangArg.u)) {
            gangValues.push_back(fir::getBase(converter.genExprValue(
                *Fortran::semantics::GetExpr(num->v), stmtCtx)));
            gangArgs.push_back(mlir::acc::GangArgTypeAttr::get(
                builder.getContext(), mlir::acc::GangArgType::Num));
          } else if (const auto *staticArg =
                         std::get_if<Fortran::parser::AccGangArg::Static>(
                             &gangArg.u)) {
            const Fortran::parser::AccSizeExpr &sizeExpr = staticArg->v;
            if (sizeExpr.v) {
              gangValues.push_back(fir::getBase(converter.genExprValue(
                  *Fortran::semantics::GetExpr(*sizeExpr.v), stmtCtx)));
            } else {
              // * was passed as value and will be represented as a special
              // constant.
              gangValues.push_back(builder.createIntegerConstant(
                  clauseLocation, builder.getIndexType(), starCst));
            }
            gangArgs.push_back(mlir::acc::GangArgTypeAttr::get(
                builder.getContext(), mlir::acc::GangArgType::Static));
          } else if (const auto *dim =
                         std::get_if<Fortran::parser::AccGangArg::Dim>(
                             &gangArg.u)) {
            gangValues.push_back(fir::getBase(converter.genExprValue(
                *Fortran::semantics::GetExpr(dim->v), stmtCtx)));
            gangArgs.push_back(mlir::acc::GangArgTypeAttr::get(
                builder.getContext(), mlir::acc::GangArgType::Dim));
          }
        }
        for (auto crtDeviceTypeAttr : crtDeviceTypes) {
          for (const auto &pair : llvm::zip(gangValues, gangArgs)) {
            gangOperands.push_back(std::get<0>(pair));
            gangArgTypes.push_back(std::get<1>(pair));
          }
          gangOperandsSegments.push_back(gangValues.size());
          gangOperandsDeviceTypes.push_back(crtDeviceTypeAttr);
        }
      } else {
        for (auto crtDeviceTypeAttr : crtDeviceTypes)
          gangDeviceTypes.push_back(crtDeviceTypeAttr);
      }
    } else if (const auto *workerClause =
                   std::get_if<Fortran::parser::AccClause::Worker>(&clause.u)) {
      if (workerClause->v) {
        mlir::Value workerNumValue = fir::getBase(converter.genExprValue(
            *Fortran::semantics::GetExpr(*workerClause->v), stmtCtx));
        for (auto crtDeviceTypeAttr : crtDeviceTypes) {
          workerNumOperands.push_back(workerNumValue);
          workerNumOperandsDeviceTypes.push_back(crtDeviceTypeAttr);
        }
      } else {
        for (auto crtDeviceTypeAttr : crtDeviceTypes)
          workerNumDeviceTypes.push_back(crtDeviceTypeAttr);
      }
    } else if (const auto *vectorClause =
                   std::get_if<Fortran::parser::AccClause::Vector>(&clause.u)) {
      if (vectorClause->v) {
        mlir::Value vectorValue = fir::getBase(converter.genExprValue(
            *Fortran::semantics::GetExpr(*vectorClause->v), stmtCtx));
        for (auto crtDeviceTypeAttr : crtDeviceTypes) {
          vectorOperands.push_back(vectorValue);
          vectorOperandsDeviceTypes.push_back(crtDeviceTypeAttr);
        }
      } else {
        for (auto crtDeviceTypeAttr : crtDeviceTypes)
          vectorDeviceTypes.push_back(crtDeviceTypeAttr);
      }
    } else if (const auto *tileClause =
                   std::get_if<Fortran::parser::AccClause::Tile>(&clause.u)) {
      const Fortran::parser::AccTileExprList &accTileExprList = tileClause->v;
      llvm::SmallVector<mlir::Value> tileValues;
      for (const auto &accTileExpr : accTileExprList.v) {
        const auto &expr =
            std::get<std::optional<Fortran::parser::ScalarIntConstantExpr>>(
                accTileExpr.t);
        if (expr) {
          tileValues.push_back(fir::getBase(converter.genExprValue(
              *Fortran::semantics::GetExpr(*expr), stmtCtx)));
        } else {
          // * was passed as value and will be represented as a special
          // constant.
          mlir::Value tileStar = builder.createIntegerConstant(
              clauseLocation, builder.getIntegerType(32), starCst);
          tileValues.push_back(tileStar);
        }
      }
      for (auto crtDeviceTypeAttr : crtDeviceTypes) {
        for (auto value : tileValues)
          tileOperands.push_back(value);
        tileOperandsDeviceTypes.push_back(crtDeviceTypeAttr);
        tileOperandsSegments.push_back(tileValues.size());
      }
    } else if (const auto *privateClause =
                   std::get_if<Fortran::parser::AccClause::Private>(
                       &clause.u)) {
      genPrivatizations<mlir::acc::PrivateRecipeOp>(
          privateClause->v, converter, semanticsContext, stmtCtx,
          privateOperands, privatizations, /*async=*/{},
          /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
    } else if (const auto *reductionClause =
                   std::get_if<Fortran::parser::AccClause::Reduction>(
                       &clause.u)) {
      genReductions(reductionClause->v, converter, semanticsContext, stmtCtx,
                    reductionOperands, reductionRecipes, /*async=*/{},
                    /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
    } else if (std::get_if<Fortran::parser::AccClause::Seq>(&clause.u)) {
      for (auto crtDeviceTypeAttr : crtDeviceTypes)
        seqDeviceTypes.push_back(crtDeviceTypeAttr);
    } else if (std::get_if<Fortran::parser::AccClause::Independent>(
                   &clause.u)) {
      for (auto crtDeviceTypeAttr : crtDeviceTypes)
        independentDeviceTypes.push_back(crtDeviceTypeAttr);
    } else if (std::get_if<Fortran::parser::AccClause::Auto>(&clause.u)) {
      for (auto crtDeviceTypeAttr : crtDeviceTypes)
        autoDeviceTypes.push_back(crtDeviceTypeAttr);
    } else if (const auto *deviceTypeClause =
                   std::get_if<Fortran::parser::AccClause::DeviceType>(
                       &clause.u)) {
      crtDeviceTypes.clear();
      gatherDeviceTypeAttrs(builder, deviceTypeClause, crtDeviceTypes);
    } else if (const auto *collapseClause =
                   std::get_if<Fortran::parser::AccClause::Collapse>(
                       &clause.u)) {
      const Fortran::parser::AccCollapseArg &arg = collapseClause->v;
      const auto &force = std::get<bool>(arg.t);
      if (force)
        TODO(clauseLocation, "OpenACC collapse force modifier");

      const auto &intExpr =
          std::get<Fortran::parser::ScalarIntConstantExpr>(arg.t);
      const auto *expr = Fortran::semantics::GetExpr(intExpr);
      const std::optional<int64_t> collapseValue =
          Fortran::evaluate::ToInt64(*expr);
      assert(collapseValue && "expect integer value for the collapse clause");

      for (auto crtDeviceTypeAttr : crtDeviceTypes) {
        collapseValues.push_back(*collapseValue);
        collapseDeviceTypes.push_back(crtDeviceTypeAttr);
      }
    }
  }

  // Prepare the operand segment size attribute and the operands value range.
  llvm::SmallVector<mlir::Value> operands;
  llvm::SmallVector<int32_t> operandSegments;
  addOperands(operands, operandSegments, lowerbounds);
  addOperands(operands, operandSegments, upperbounds);
  addOperands(operands, operandSegments, steps);
  addOperands(operands, operandSegments, gangOperands);
  addOperands(operands, operandSegments, workerNumOperands);
  addOperands(operands, operandSegments, vectorOperands);
  addOperands(operands, operandSegments, tileOperands);
  addOperands(operands, operandSegments, cacheOperands);
  addOperands(operands, operandSegments, privateOperands);
  addOperands(operands, operandSegments, reductionOperands);

  llvm::SmallVector<mlir::Type> retTy;
  mlir::Value yieldValue;
  if (needEarlyReturnHandling) {
    mlir::Type i1Ty = builder.getI1Type();
    yieldValue = builder.createIntegerConstant(currentLocation, i1Ty, 0);
    retTy.push_back(i1Ty);
  }

  auto loopOp = createRegionOp<mlir::acc::LoopOp, mlir::acc::YieldOp>(
      builder, builder.getFusedLoc(locs), currentLocation, eval, operands,
      operandSegments, /*outerCombined=*/false, retTy, yieldValue, ivTypes,
      ivLocs);

  for (auto [arg, value] : llvm::zip(
           loopOp.getLoopRegions().front()->front().getArguments(), ivPrivate))
    builder.create<fir::StoreOp>(currentLocation, arg, value);

  loopOp.setInclusiveUpperbound(inclusiveBounds);

  if (!gangDeviceTypes.empty())
    loopOp.setGangAttr(builder.getArrayAttr(gangDeviceTypes));
  if (!gangArgTypes.empty())
    loopOp.setGangOperandsArgTypeAttr(builder.getArrayAttr(gangArgTypes));
  if (!gangOperandsSegments.empty())
    loopOp.setGangOperandsSegmentsAttr(
        builder.getDenseI32ArrayAttr(gangOperandsSegments));
  if (!gangOperandsDeviceTypes.empty())
    loopOp.setGangOperandsDeviceTypeAttr(
        builder.getArrayAttr(gangOperandsDeviceTypes));

  if (!workerNumDeviceTypes.empty())
    loopOp.setWorkerAttr(builder.getArrayAttr(workerNumDeviceTypes));
  if (!workerNumOperandsDeviceTypes.empty())
    loopOp.setWorkerNumOperandsDeviceTypeAttr(
        builder.getArrayAttr(workerNumOperandsDeviceTypes));

  if (!vectorDeviceTypes.empty())
    loopOp.setVectorAttr(builder.getArrayAttr(vectorDeviceTypes));
  if (!vectorOperandsDeviceTypes.empty())
    loopOp.setVectorOperandsDeviceTypeAttr(
        builder.getArrayAttr(vectorOperandsDeviceTypes));

  if (!tileOperandsDeviceTypes.empty())
    loopOp.setTileOperandsDeviceTypeAttr(
        builder.getArrayAttr(tileOperandsDeviceTypes));
  if (!tileOperandsSegments.empty())
    loopOp.setTileOperandsSegmentsAttr(
        builder.getDenseI32ArrayAttr(tileOperandsSegments));

  if (!seqDeviceTypes.empty())
    loopOp.setSeqAttr(builder.getArrayAttr(seqDeviceTypes));
  if (!independentDeviceTypes.empty())
    loopOp.setIndependentAttr(builder.getArrayAttr(independentDeviceTypes));
  if (!autoDeviceTypes.empty())
    loopOp.setAuto_Attr(builder.getArrayAttr(autoDeviceTypes));

  if (!privatizations.empty())
    loopOp.setPrivatizationsAttr(
        mlir::ArrayAttr::get(builder.getContext(), privatizations));

  if (!reductionRecipes.empty())
    loopOp.setReductionRecipesAttr(
        mlir::ArrayAttr::get(builder.getContext(), reductionRecipes));

  if (!collapseValues.empty())
    loopOp.setCollapseAttr(builder.getI64ArrayAttr(collapseValues));
  if (!collapseDeviceTypes.empty())
    loopOp.setCollapseDeviceTypeAttr(builder.getArrayAttr(collapseDeviceTypes));

  if (combinedConstructs)
    loopOp.setCombinedAttr(mlir::acc::CombinedConstructsTypeAttr::get(
        builder.getContext(), *combinedConstructs));

  // TODO: retrieve directives from NonLabelDoStmt pft::Evaluation, and add them
  // as attribute to the acc.loop as an extra attribute. It is not quite clear
  // how useful these $dir are in acc contexts, but they could still provide
  // more information about the loop acc codegen. They can be obtained by
  // looking for the first lexicalSuccessor of eval that is a NonLabelDoStmt,
  // and using the related `dirs` member.

  return loopOp;
}

static bool hasEarlyReturn(Fortran::lower::pft::Evaluation &eval) {
  bool hasReturnStmt = false;
  for (auto &e : eval.getNestedEvaluations()) {
    e.visit(Fortran::common::visitors{
        [&](const Fortran::parser::ReturnStmt &) { hasReturnStmt = true; },
        [&](const auto &s) {},
    });
    if (e.hasNestedEvaluations())
      hasReturnStmt = hasEarlyReturn(e);
  }
  return hasReturnStmt;
}

static mlir::Value
genACC(Fortran::lower::AbstractConverter &converter,
       Fortran::semantics::SemanticsContext &semanticsContext,
       Fortran::lower::pft::Evaluation &eval,
       const Fortran::parser::OpenACCLoopConstruct &loopConstruct) {

  const auto &beginLoopDirective =
      std::get<Fortran::parser::AccBeginLoopDirective>(loopConstruct.t);
  const auto &loopDirective =
      std::get<Fortran::parser::AccLoopDirective>(beginLoopDirective.t);

  bool needEarlyExitHandling = false;
  if (eval.lowerAsUnstructured())
    needEarlyExitHandling = hasEarlyReturn(eval);

  mlir::Location currentLocation =
      converter.genLocation(beginLoopDirective.source);
  Fortran::lower::StatementContext stmtCtx;

  assert(loopDirective.v == llvm::acc::ACCD_loop &&
         "Unsupported OpenACC loop construct");
  (void)loopDirective;

  const auto &accClauseList =
      std::get<Fortran::parser::AccClauseList>(beginLoopDirective.t);
  const auto &outerDoConstruct =
      std::get<std::optional<Fortran::parser::DoConstruct>>(loopConstruct.t);
  auto loopOp = createLoopOp(converter, currentLocation, semanticsContext,
                             stmtCtx, *outerDoConstruct, eval, accClauseList,
                             /*combinedConstructs=*/{}, needEarlyExitHandling);
  if (needEarlyExitHandling)
    return loopOp.getResult(0);

  return mlir::Value{};
}

template <typename Op, typename Clause>
static void genDataOperandOperationsWithModifier(
    const Clause *x, Fortran::lower::AbstractConverter &converter,
    Fortran::semantics::SemanticsContext &semanticsContext,
    Fortran::lower::StatementContext &stmtCtx,
    Fortran::parser::AccDataModifier::Modifier mod,
    llvm::SmallVectorImpl<mlir::Value> &dataClauseOperands,
    const mlir::acc::DataClause clause,
    const mlir::acc::DataClause clauseWithModifier,
    llvm::ArrayRef<mlir::Value> async,
    llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
    llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes,
    bool setDeclareAttr = false) {
  const Fortran::parser::AccObjectListWithModifier &listWithModifier = x->v;
  const auto &accObjectList =
      std::get<Fortran::parser::AccObjectList>(listWithModifier.t);
  const auto &modifier =
      std::get<std::optional<Fortran::parser::AccDataModifier>>(
          listWithModifier.t);
  mlir::acc::DataClause dataClause =
      (modifier && (*modifier).v == mod) ? clauseWithModifier : clause;
  genDataOperandOperations<Op>(accObjectList, converter, semanticsContext,
                               stmtCtx, dataClauseOperands, dataClause,
                               /*structured=*/true, /*implicit=*/false, async,
                               asyncDeviceTypes, asyncOnlyDeviceTypes,
                               setDeclareAttr);
}

template <typename Op>
static Op createComputeOp(
    Fortran::lower::AbstractConverter &converter,
    mlir::Location currentLocation, Fortran::lower::pft::Evaluation &eval,
    Fortran::semantics::SemanticsContext &semanticsContext,
    Fortran::lower::StatementContext &stmtCtx,
    const Fortran::parser::AccClauseList &accClauseList,
    std::optional<mlir::acc::CombinedConstructsType> combinedConstructs =
        std::nullopt) {

  // Parallel operation operands
  mlir::Value ifCond;
  mlir::Value selfCond;
  llvm::SmallVector<mlir::Value> waitOperands, attachEntryOperands,
      copyEntryOperands, copyoutEntryOperands, createEntryOperands,
      dataClauseOperands, numGangs, numWorkers, vectorLength, async;
  llvm::SmallVector<mlir::Attribute> numGangsDeviceTypes, numWorkersDeviceTypes,
      vectorLengthDeviceTypes, asyncDeviceTypes, asyncOnlyDeviceTypes,
      waitOperandsDeviceTypes, waitOnlyDeviceTypes;
  llvm::SmallVector<int32_t> numGangsSegments, waitOperandsSegments;
  llvm::SmallVector<bool> hasWaitDevnums;

  llvm::SmallVector<mlir::Value> reductionOperands, privateOperands,
      firstprivateOperands;
  llvm::SmallVector<mlir::Attribute> privatizations, firstPrivatizations,
      reductionRecipes;

  // Self clause has optional values but can be present with
  // no value as well. When there is no value, the op has an attribute to
  // represent the clause.
  bool addSelfAttr = false;

  bool hasDefaultNone = false;
  bool hasDefaultPresent = false;

  fir::FirOpBuilder &builder = converter.getFirOpBuilder();

  // device_type attribute is set to `none` until a device_type clause is
  // encountered.
  llvm::SmallVector<mlir::Attribute> crtDeviceTypes;
  auto crtDeviceTypeAttr = mlir::acc::DeviceTypeAttr::get(
      builder.getContext(), mlir::acc::DeviceType::None);
  crtDeviceTypes.push_back(crtDeviceTypeAttr);

  // Lower clauses values mapped to operands and array attributes.
  // Keep track of each group of operands separately as clauses can appear
  // more than once.

  // Process the clauses that may have a specified device_type first.
  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
    if (const auto *asyncClause =
            std::get_if<Fortran::parser::AccClause::Async>(&clause.u)) {
      genAsyncClause(converter, asyncClause, async, asyncDeviceTypes,
                     asyncOnlyDeviceTypes, crtDeviceTypes, stmtCtx);
    } else if (const auto *waitClause =
                   std::get_if<Fortran::parser::AccClause::Wait>(&clause.u)) {
      genWaitClauseWithDeviceType(converter, waitClause, waitOperands,
                                  waitOperandsDeviceTypes, waitOnlyDeviceTypes,
                                  hasWaitDevnums, waitOperandsSegments,
                                  crtDeviceTypes, stmtCtx);
    } else if (const auto *numGangsClause =
                   std::get_if<Fortran::parser::AccClause::NumGangs>(
                       &clause.u)) {
      llvm::SmallVector<mlir::Value> numGangValues;
      for (const Fortran::parser::ScalarIntExpr &expr : numGangsClause->v)
        numGangValues.push_back(fir::getBase(converter.genExprValue(
            *Fortran::semantics::GetExpr(expr), stmtCtx)));
      for (auto crtDeviceTypeAttr : crtDeviceTypes) {
        for (auto value : numGangValues)
          numGangs.push_back(value);
        numGangsDeviceTypes.push_back(crtDeviceTypeAttr);
        numGangsSegments.push_back(numGangValues.size());
      }
    } else if (const auto *numWorkersClause =
                   std::get_if<Fortran::parser::AccClause::NumWorkers>(
                       &clause.u)) {
      mlir::Value numWorkerValue = fir::getBase(converter.genExprValue(
          *Fortran::semantics::GetExpr(numWorkersClause->v), stmtCtx));
      for (auto crtDeviceTypeAttr : crtDeviceTypes) {
        numWorkers.push_back(numWorkerValue);
        numWorkersDeviceTypes.push_back(crtDeviceTypeAttr);
      }
    } else if (const auto *vectorLengthClause =
                   std::get_if<Fortran::parser::AccClause::VectorLength>(
                       &clause.u)) {
      mlir::Value vectorLengthValue = fir::getBase(converter.genExprValue(
          *Fortran::semantics::GetExpr(vectorLengthClause->v), stmtCtx));
      for (auto crtDeviceTypeAttr : crtDeviceTypes) {
        vectorLength.push_back(vectorLengthValue);
        vectorLengthDeviceTypes.push_back(crtDeviceTypeAttr);
      }
    } else if (const auto *deviceTypeClause =
                   std::get_if<Fortran::parser::AccClause::DeviceType>(
                       &clause.u)) {
      crtDeviceTypes.clear();
      gatherDeviceTypeAttrs(builder, deviceTypeClause, crtDeviceTypes);
    }
  }

  // Process the clauses independent of device_type.
  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
    mlir::Location clauseLocation = converter.genLocation(clause.source);
    if (const auto *ifClause =
            std::get_if<Fortran::parser::AccClause::If>(&clause.u)) {
      genIfClause(converter, clauseLocation, ifClause, ifCond, stmtCtx);
    } else if (const auto *selfClause =
                   std::get_if<Fortran::parser::AccClause::Self>(&clause.u)) {
      const std::optional<Fortran::parser::AccSelfClause> &accSelfClause =
          selfClause->v;
      if (accSelfClause) {
        if (const auto *optCondition =
                std::get_if<std::optional<Fortran::parser::ScalarLogicalExpr>>(
                    &(*accSelfClause).u)) {
          if (*optCondition) {
            mlir::Value cond = fir::getBase(converter.genExprValue(
                *Fortran::semantics::GetExpr(*optCondition), stmtCtx));
            selfCond = builder.createConvert(clauseLocation,
                                             builder.getI1Type(), cond);
          }
        } else if (const auto *accClauseList =
                       std::get_if<Fortran::parser::AccObjectList>(
                           &(*accSelfClause).u)) {
          // TODO This would be nicer to be done in canonicalization step.
          if (accClauseList->v.size() == 1) {
            const auto &accObject = accClauseList->v.front();
            if (const auto *designator =
                    std::get_if<Fortran::parser::Designator>(&accObject.u)) {
              if (const auto *name =
                      Fortran::semantics::getDesignatorNameIfDataRef(
                          *designator)) {
                auto cond = converter.getSymbolAddress(*name->symbol);
                selfCond = builder.createConvert(clauseLocation,
                                                 builder.getI1Type(), cond);
              }
            }
          }
        }
      } else {
        addSelfAttr = true;
      }
    } else if (const auto *copyClause =
                   std::get_if<Fortran::parser::AccClause::Copy>(&clause.u)) {
      auto crtDataStart = dataClauseOperands.size();
      genDataOperandOperations<mlir::acc::CopyinOp>(
          copyClause->v, converter, semanticsContext, stmtCtx,
          dataClauseOperands, mlir::acc::DataClause::acc_copy,
          /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes,
          asyncOnlyDeviceTypes);
      copyEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
                               dataClauseOperands.end());
    } else if (const auto *copyinClause =
                   std::get_if<Fortran::parser::AccClause::Copyin>(&clause.u)) {
      genDataOperandOperationsWithModifier<mlir::acc::CopyinOp,
                                           Fortran::parser::AccClause::Copyin>(
          copyinClause, converter, semanticsContext, stmtCtx,
          Fortran::parser::AccDataModifier::Modifier::ReadOnly,
          dataClauseOperands, mlir::acc::DataClause::acc_copyin,
          mlir::acc::DataClause::acc_copyin_readonly, async, asyncDeviceTypes,
          asyncOnlyDeviceTypes);
    } else if (const auto *copyoutClause =
                   std::get_if<Fortran::parser::AccClause::Copyout>(
                       &clause.u)) {
      auto crtDataStart = dataClauseOperands.size();
      genDataOperandOperationsWithModifier<mlir::acc::CreateOp,
                                           Fortran::parser::AccClause::Copyout>(
          copyoutClause, converter, semanticsContext, stmtCtx,
          Fortran::parser::AccDataModifier::Modifier::ReadOnly,
          dataClauseOperands, mlir::acc::DataClause::acc_copyout,
          mlir::acc::DataClause::acc_copyout_zero, async, asyncDeviceTypes,
          asyncOnlyDeviceTypes);
      copyoutEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
                                  dataClauseOperands.end());
    } else if (const auto *createClause =
                   std::get_if<Fortran::parser::AccClause::Create>(&clause.u)) {
      auto crtDataStart = dataClauseOperands.size();
      genDataOperandOperationsWithModifier<mlir::acc::CreateOp,
                                           Fortran::parser::AccClause::Create>(
          createClause, converter, semanticsContext, stmtCtx,
          Fortran::parser::AccDataModifier::Modifier::Zero, dataClauseOperands,
          mlir::acc::DataClause::acc_create,
          mlir::acc::DataClause::acc_create_zero, async, asyncDeviceTypes,
          asyncOnlyDeviceTypes);
      createEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
                                 dataClauseOperands.end());
    } else if (const auto *noCreateClause =
                   std::get_if<Fortran::parser::AccClause::NoCreate>(
                       &clause.u)) {
      genDataOperandOperations<mlir::acc::NoCreateOp>(
          noCreateClause->v, converter, semanticsContext, stmtCtx,
          dataClauseOperands, mlir::acc::DataClause::acc_no_create,
          /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes,
          asyncOnlyDeviceTypes);
    } else if (const auto *presentClause =
                   std::get_if<Fortran::parser::AccClause::Present>(
                       &clause.u)) {
      genDataOperandOperations<mlir::acc::PresentOp>(
          presentClause->v, converter, semanticsContext, stmtCtx,
          dataClauseOperands, mlir::acc::DataClause::acc_present,
          /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes,
          asyncOnlyDeviceTypes);
    } else if (const auto *devicePtrClause =
                   std::get_if<Fortran::parser::AccClause::Deviceptr>(
                       &clause.u)) {
      genDataOperandOperations<mlir::acc::DevicePtrOp>(
          devicePtrClause->v, converter, semanticsContext, stmtCtx,
          dataClauseOperands, mlir::acc::DataClause::acc_deviceptr,
          /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes,
          asyncOnlyDeviceTypes);
    } else if (const auto *attachClause =
                   std::get_if<Fortran::parser::AccClause::Attach>(&clause.u)) {
      auto crtDataStart = dataClauseOperands.size();
      genDataOperandOperations<mlir::acc::AttachOp>(
          attachClause->v, converter, semanticsContext, stmtCtx,
          dataClauseOperands, mlir::acc::DataClause::acc_attach,
          /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes,
          asyncOnlyDeviceTypes);
      attachEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
                                 dataClauseOperands.end());
    } else if (const auto *privateClause =
                   std::get_if<Fortran::parser::AccClause::Private>(
                       &clause.u)) {
      if (!combinedConstructs)
        genPrivatizations<mlir::acc::PrivateRecipeOp>(
            privateClause->v, converter, semanticsContext, stmtCtx,
            privateOperands, privatizations, async, asyncDeviceTypes,
            asyncOnlyDeviceTypes);
    } else if (const auto *firstprivateClause =
                   std::get_if<Fortran::parser::AccClause::Firstprivate>(
                       &clause.u)) {
      genPrivatizations<mlir::acc::FirstprivateRecipeOp>(
          firstprivateClause->v, converter, semanticsContext, stmtCtx,
          firstprivateOperands, firstPrivatizations, async, asyncDeviceTypes,
          asyncOnlyDeviceTypes);
    } else if (const auto *reductionClause =
                   std::get_if<Fortran::parser::AccClause::Reduction>(
                       &clause.u)) {
      // A reduction clause on a combined construct is treated as if it appeared
      // on the loop construct. So don't generate a reduction clause when it is
      // combined - delay it to the loop. However, a reduction clause on a
      // combined construct implies a copy clause so issue an implicit copy
      // instead.
      if (!combinedConstructs) {
        genReductions(reductionClause->v, converter, semanticsContext, stmtCtx,
                      reductionOperands, reductionRecipes, async,
                      asyncDeviceTypes, asyncOnlyDeviceTypes);
      } else {
        auto crtDataStart = dataClauseOperands.size();
        genDataOperandOperations<mlir::acc::CopyinOp>(
            std::get<Fortran::parser::AccObjectList>(reductionClause->v.t),
            converter, semanticsContext, stmtCtx, dataClauseOperands,
            mlir::acc::DataClause::acc_reduction,
            /*structured=*/true, /*implicit=*/true, async, asyncDeviceTypes,
            asyncOnlyDeviceTypes);
        copyEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
                                 dataClauseOperands.end());
      }
    } else if (const auto *defaultClause =
                   std::get_if<Fortran::parser::AccClause::Default>(
                       &clause.u)) {
      if ((defaultClause->v).v == llvm::acc::DefaultValue::ACC_Default_none)
        hasDefaultNone = true;
      else if ((defaultClause->v).v ==
               llvm::acc::DefaultValue::ACC_Default_present)
        hasDefaultPresent = true;
    }
  }

  // Prepare the operand segment size attribute and the operands value range.
  llvm::SmallVector<mlir::Value, 8> operands;
  llvm::SmallVector<int32_t, 8> operandSegments;
  addOperands(operands, operandSegments, async);
  addOperands(operands, operandSegments, waitOperands);
  if constexpr (!std::is_same_v<Op, mlir::acc::SerialOp>) {
    addOperands(operands, operandSegments, numGangs);
    addOperands(operands, operandSegments, numWorkers);
    addOperands(operands, operandSegments, vectorLength);
  }
  addOperand(operands, operandSegments, ifCond);
  addOperand(operands, operandSegments, selfCond);
  if constexpr (!std::is_same_v<Op, mlir::acc::KernelsOp>) {
    addOperands(operands, operandSegments, reductionOperands);
    addOperands(operands, operandSegments, privateOperands);
    addOperands(operands, operandSegments, firstprivateOperands);
  }
  addOperands(operands, operandSegments, dataClauseOperands);

  Op computeOp;
  if constexpr (std::is_same_v<Op, mlir::acc::KernelsOp>)
    computeOp = createRegionOp<Op, mlir::acc::TerminatorOp>(
        builder, currentLocation, currentLocation, eval, operands,
        operandSegments, /*outerCombined=*/combinedConstructs.has_value());
  else
    computeOp = createRegionOp<Op, mlir::acc::YieldOp>(
        builder, currentLocation, currentLocation, eval, operands,
        operandSegments, /*outerCombined=*/combinedConstructs.has_value());

  if (addSelfAttr)
    computeOp.setSelfAttrAttr(builder.getUnitAttr());

  if (hasDefaultNone)
    computeOp.setDefaultAttr(mlir::acc::ClauseDefaultValue::None);
  if (hasDefaultPresent)
    computeOp.setDefaultAttr(mlir::acc::ClauseDefaultValue::Present);

  if constexpr (!std::is_same_v<Op, mlir::acc::SerialOp>) {
    if (!numWorkersDeviceTypes.empty())
      computeOp.setNumWorkersDeviceTypeAttr(
          mlir::ArrayAttr::get(builder.getContext(), numWorkersDeviceTypes));
    if (!vectorLengthDeviceTypes.empty())
      computeOp.setVectorLengthDeviceTypeAttr(
          mlir::ArrayAttr::get(builder.getContext(), vectorLengthDeviceTypes));
    if (!numGangsDeviceTypes.empty())
      computeOp.setNumGangsDeviceTypeAttr(
          mlir::ArrayAttr::get(builder.getContext(), numGangsDeviceTypes));
    if (!numGangsSegments.empty())
      computeOp.setNumGangsSegmentsAttr(
          builder.getDenseI32ArrayAttr(numGangsSegments));
  }
  if (!asyncDeviceTypes.empty())
    computeOp.setAsyncOperandsDeviceTypeAttr(
        builder.getArrayAttr(asyncDeviceTypes));
  if (!asyncOnlyDeviceTypes.empty())
    computeOp.setAsyncOnlyAttr(builder.getArrayAttr(asyncOnlyDeviceTypes));

  if (!waitOperandsDeviceTypes.empty())
    computeOp.setWaitOperandsDeviceTypeAttr(
        builder.getArrayAttr(waitOperandsDeviceTypes));
  if (!waitOperandsSegments.empty())
    computeOp.setWaitOperandsSegmentsAttr(
        builder.getDenseI32ArrayAttr(waitOperandsSegments));
  if (!hasWaitDevnums.empty())
    computeOp.setHasWaitDevnumAttr(builder.getBoolArrayAttr(hasWaitDevnums));
  if (!waitOnlyDeviceTypes.empty())
    computeOp.setWaitOnlyAttr(builder.getArrayAttr(waitOnlyDeviceTypes));

  if constexpr (!std::is_same_v<Op, mlir::acc::KernelsOp>) {
    if (!privatizations.empty())
      computeOp.setPrivatizationsAttr(
          mlir::ArrayAttr::get(builder.getContext(), privatizations));
    if (!reductionRecipes.empty())
      computeOp.setReductionRecipesAttr(
          mlir::ArrayAttr::get(builder.getContext(), reductionRecipes));
    if (!firstPrivatizations.empty())
      computeOp.setFirstprivatizationsAttr(
          mlir::ArrayAttr::get(builder.getContext(), firstPrivatizations));
  }

  if (combinedConstructs)
    computeOp.setCombinedAttr(builder.getUnitAttr());

  auto insPt = builder.saveInsertionPoint();
  builder.setInsertionPointAfter(computeOp);

  // Create the exit operations after the region.
  genDataExitOperations<mlir::acc::CopyinOp, mlir::acc::CopyoutOp>(
      builder, copyEntryOperands, /*structured=*/true);
  genDataExitOperations<mlir::acc::CreateOp, mlir::acc::CopyoutOp>(
      builder, copyoutEntryOperands, /*structured=*/true);
  genDataExitOperations<mlir::acc::AttachOp, mlir::acc::DetachOp>(
      builder, attachEntryOperands, /*structured=*/true);
  genDataExitOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>(
      builder, createEntryOperands, /*structured=*/true);

  builder.restoreInsertionPoint(insPt);
  return computeOp;
}

static void genACCDataOp(Fortran::lower::AbstractConverter &converter,
                         mlir::Location currentLocation,
                         Fortran::lower::pft::Evaluation &eval,
                         Fortran::semantics::SemanticsContext &semanticsContext,
                         Fortran::lower::StatementContext &stmtCtx,
                         const Fortran::parser::AccClauseList &accClauseList) {
  mlir::Value ifCond;
  llvm::SmallVector<mlir::Value> attachEntryOperands, createEntryOperands,
      copyEntryOperands, copyoutEntryOperands, dataClauseOperands, waitOperands,
      async;
  llvm::SmallVector<mlir::Attribute> asyncDeviceTypes, asyncOnlyDeviceTypes,
      waitOperandsDeviceTypes, waitOnlyDeviceTypes;
  llvm::SmallVector<int32_t> waitOperandsSegments;
  llvm::SmallVector<bool> hasWaitDevnums;

  bool hasDefaultNone = false;
  bool hasDefaultPresent = false;

  fir::FirOpBuilder &builder = converter.getFirOpBuilder();

  // device_type attribute is set to `none` until a device_type clause is
  // encountered.
  llvm::SmallVector<mlir::Attribute> crtDeviceTypes;
  crtDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
      builder.getContext(), mlir::acc::DeviceType::None));

  // Lower clauses values mapped to operands and array attributes.
  // Keep track of each group of operands separately as clauses can appear
  // more than once.

  // Process the clauses that may have a specified device_type first.
  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
    if (const auto *asyncClause =
            std::get_if<Fortran::parser::AccClause::Async>(&clause.u)) {
      genAsyncClause(converter, asyncClause, async, asyncDeviceTypes,
                     asyncOnlyDeviceTypes, crtDeviceTypes, stmtCtx);
    } else if (const auto *waitClause =
                   std::get_if<Fortran::parser::AccClause::Wait>(&clause.u)) {
      genWaitClauseWithDeviceType(converter, waitClause, waitOperands,
                                  waitOperandsDeviceTypes, waitOnlyDeviceTypes,
                                  hasWaitDevnums, waitOperandsSegments,
                                  crtDeviceTypes, stmtCtx);
    } else if (const auto *deviceTypeClause =
                   std::get_if<Fortran::parser::AccClause::DeviceType>(
                       &clause.u)) {
      crtDeviceTypes.clear();
      gatherDeviceTypeAttrs(builder, deviceTypeClause, crtDeviceTypes);
    }
  }

  // Process the clauses independent of device_type.
  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
    mlir::Location clauseLocation = converter.genLocation(clause.source);
    if (const auto *ifClause =
            std::get_if<Fortran::parser::AccClause::If>(&clause.u)) {
      genIfClause(converter, clauseLocation, ifClause, ifCond, stmtCtx);
    } else if (const auto *copyClause =
                   std::get_if<Fortran::parser::AccClause::Copy>(&clause.u)) {
      auto crtDataStart = dataClauseOperands.size();
      genDataOperandOperations<mlir::acc::CopyinOp>(
          copyClause->v, converter, semanticsContext, stmtCtx,
          dataClauseOperands, mlir::acc::DataClause::acc_copy,
          /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes,
          asyncOnlyDeviceTypes);
      copyEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
                               dataClauseOperands.end());
    } else if (const auto *copyinClause =
                   std::get_if<Fortran::parser::AccClause::Copyin>(&clause.u)) {
      genDataOperandOperationsWithModifier<mlir::acc::CopyinOp,
                                           Fortran::parser::AccClause::Copyin>(
          copyinClause, converter, semanticsContext, stmtCtx,
          Fortran::parser::AccDataModifier::Modifier::ReadOnly,
          dataClauseOperands, mlir::acc::DataClause::acc_copyin,
          mlir::acc::DataClause::acc_copyin_readonly, async, asyncDeviceTypes,
          asyncOnlyDeviceTypes);
    } else if (const auto *copyoutClause =
                   std::get_if<Fortran::parser::AccClause::Copyout>(
                       &clause.u)) {
      auto crtDataStart = dataClauseOperands.size();
      genDataOperandOperationsWithModifier<mlir::acc::CreateOp,
                                           Fortran::parser::AccClause::Copyout>(
          copyoutClause, converter, semanticsContext, stmtCtx,
          Fortran::parser::AccDataModifier::Modifier::Zero, dataClauseOperands,
          mlir::acc::DataClause::acc_copyout,
          mlir::acc::DataClause::acc_copyout_zero, async, asyncDeviceTypes,
          asyncOnlyDeviceTypes);
      copyoutEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
                                  dataClauseOperands.end());
    } else if (const auto *createClause =
                   std::get_if<Fortran::parser::AccClause::Create>(&clause.u)) {
      auto crtDataStart = dataClauseOperands.size();
      genDataOperandOperationsWithModifier<mlir::acc::CreateOp,
                                           Fortran::parser::AccClause::Create>(
          createClause, converter, semanticsContext, stmtCtx,
          Fortran::parser::AccDataModifier::Modifier::Zero, dataClauseOperands,
          mlir::acc::DataClause::acc_create,
          mlir::acc::DataClause::acc_create_zero, async, asyncDeviceTypes,
          asyncOnlyDeviceTypes);
      createEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
                                 dataClauseOperands.end());
    } else if (const auto *noCreateClause =
                   std::get_if<Fortran::parser::AccClause::NoCreate>(
                       &clause.u)) {
      genDataOperandOperations<mlir::acc::NoCreateOp>(
          noCreateClause->v, converter, semanticsContext, stmtCtx,
          dataClauseOperands, mlir::acc::DataClause::acc_no_create,
          /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes,
          asyncOnlyDeviceTypes);
    } else if (const auto *presentClause =
                   std::get_if<Fortran::parser::AccClause::Present>(
                       &clause.u)) {
      genDataOperandOperations<mlir::acc::PresentOp>(
          presentClause->v, converter, semanticsContext, stmtCtx,
          dataClauseOperands, mlir::acc::DataClause::acc_present,
          /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes,
          asyncOnlyDeviceTypes);
    } else if (const auto *deviceptrClause =
                   std::get_if<Fortran::parser::AccClause::Deviceptr>(
                       &clause.u)) {
      genDataOperandOperations<mlir::acc::DevicePtrOp>(
          deviceptrClause->v, converter, semanticsContext, stmtCtx,
          dataClauseOperands, mlir::acc::DataClause::acc_deviceptr,
          /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes,
          asyncOnlyDeviceTypes);
    } else if (const auto *attachClause =
                   std::get_if<Fortran::parser::AccClause::Attach>(&clause.u)) {
      auto crtDataStart = dataClauseOperands.size();
      genDataOperandOperations<mlir::acc::AttachOp>(
          attachClause->v, converter, semanticsContext, stmtCtx,
          dataClauseOperands, mlir::acc::DataClause::acc_attach,
          /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes,
          asyncOnlyDeviceTypes);
      attachEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
                                 dataClauseOperands.end());
    } else if(const auto *defaultClause = 
                  std::get_if<Fortran::parser::AccClause::Default>(&clause.u)) {
      if ((defaultClause->v).v == llvm::acc::DefaultValue::ACC_Default_none)
        hasDefaultNone = true;
      else if ((defaultClause->v).v == llvm::acc::DefaultValue::ACC_Default_present)
        hasDefaultPresent = true;
    }
  }

  // Prepare the operand segment size attribute and the operands value range.
  llvm::SmallVector<mlir::Value> operands;
  llvm::SmallVector<int32_t> operandSegments;
  addOperand(operands, operandSegments, ifCond);
  addOperands(operands, operandSegments, async);
  addOperands(operands, operandSegments, waitOperands);
  addOperands(operands, operandSegments, dataClauseOperands);

  if (dataClauseOperands.empty() && !hasDefaultNone && !hasDefaultPresent)
    return;

  auto dataOp = createRegionOp<mlir::acc::DataOp, mlir::acc::TerminatorOp>(
      builder, currentLocation, currentLocation, eval, operands,
      operandSegments);

  if (!asyncDeviceTypes.empty())
    dataOp.setAsyncOperandsDeviceTypeAttr(
        builder.getArrayAttr(asyncDeviceTypes));
  if (!asyncOnlyDeviceTypes.empty())
    dataOp.setAsyncOnlyAttr(builder.getArrayAttr(asyncOnlyDeviceTypes));
  if (!waitOperandsDeviceTypes.empty())
    dataOp.setWaitOperandsDeviceTypeAttr(
        builder.getArrayAttr(waitOperandsDeviceTypes));
  if (!waitOperandsSegments.empty())
    dataOp.setWaitOperandsSegmentsAttr(
        builder.getDenseI32ArrayAttr(waitOperandsSegments));
  if (!hasWaitDevnums.empty())
    dataOp.setHasWaitDevnumAttr(builder.getBoolArrayAttr(hasWaitDevnums));
  if (!waitOnlyDeviceTypes.empty())
    dataOp.setWaitOnlyAttr(builder.getArrayAttr(waitOnlyDeviceTypes));

  if (hasDefaultNone)
    dataOp.setDefaultAttr(mlir::acc::ClauseDefaultValue::None);
  if (hasDefaultPresent)
    dataOp.setDefaultAttr(mlir::acc::ClauseDefaultValue::Present);

  auto insPt = builder.saveInsertionPoint();
  builder.setInsertionPointAfter(dataOp);

  // Create the exit operations after the region.
  genDataExitOperations<mlir::acc::CopyinOp, mlir::acc::CopyoutOp>(
      builder, copyEntryOperands, /*structured=*/true);
  genDataExitOperations<mlir::acc::CreateOp, mlir::acc::CopyoutOp>(
      builder, copyoutEntryOperands, /*structured=*/true);
  genDataExitOperations<mlir::acc::AttachOp, mlir::acc::DetachOp>(
      builder, attachEntryOperands, /*structured=*/true);
  genDataExitOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>(
      builder, createEntryOperands, /*structured=*/true);

  builder.restoreInsertionPoint(insPt);
}

static void
genACCHostDataOp(Fortran::lower::AbstractConverter &converter,
                 mlir::Location currentLocation,
                 Fortran::lower::pft::Evaluation &eval,
                 Fortran::semantics::SemanticsContext &semanticsContext,
                 Fortran::lower::StatementContext &stmtCtx,
                 const Fortran::parser::AccClauseList &accClauseList) {
  mlir::Value ifCond;
  llvm::SmallVector<mlir::Value> dataOperands;
  bool addIfPresentAttr = false;

  fir::FirOpBuilder &builder = converter.getFirOpBuilder();

  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
    mlir::Location clauseLocation = converter.genLocation(clause.source);
    if (const auto *ifClause =
            std::get_if<Fortran::parser::AccClause::If>(&clause.u)) {
      genIfClause(converter, clauseLocation, ifClause, ifCond, stmtCtx);
    } else if (const auto *useDevice =
                   std::get_if<Fortran::parser::AccClause::UseDevice>(
                       &clause.u)) {
      genDataOperandOperations<mlir::acc::UseDeviceOp>(
          useDevice->v, converter, semanticsContext, stmtCtx, dataOperands,
          mlir::acc::DataClause::acc_use_device,
          /*structured=*/true, /*implicit=*/false, /*async=*/{},
          /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
    } else if (std::get_if<Fortran::parser::AccClause::IfPresent>(&clause.u)) {
      addIfPresentAttr = true;
    }
  }

  if (ifCond) {
    if (auto cst =
            mlir::dyn_cast<mlir::arith::ConstantOp>(ifCond.getDefiningOp()))
      if (auto boolAttr = mlir::dyn_cast<mlir::BoolAttr>(cst.getValue())) {
        if (boolAttr.getValue()) {
          // get rid of the if condition if it is always true.
          ifCond = mlir::Value();
        } else {
          // Do not generate the acc.host_data op if the if condition is always
          // false.
          return;
        }
      }
  }

  // Prepare the operand segment size attribute and the operands value range.
  llvm::SmallVector<mlir::Value> operands;
  llvm::SmallVector<int32_t> operandSegments;
  addOperand(operands, operandSegments, ifCond);
  addOperands(operands, operandSegments, dataOperands);

  auto hostDataOp =
      createRegionOp<mlir::acc::HostDataOp, mlir::acc::TerminatorOp>(
          builder, currentLocation, currentLocation, eval, operands,
          operandSegments);

  if (addIfPresentAttr)
    hostDataOp.setIfPresentAttr(builder.getUnitAttr());
}

static void
genACC(Fortran::lower::AbstractConverter &converter,
       Fortran::semantics::SemanticsContext &semanticsContext,
       Fortran::lower::pft::Evaluation &eval,
       const Fortran::parser::OpenACCBlockConstruct &blockConstruct) {
  const auto &beginBlockDirective =
      std::get<Fortran::parser::AccBeginBlockDirective>(blockConstruct.t);
  const auto &blockDirective =
      std::get<Fortran::parser::AccBlockDirective>(beginBlockDirective.t);
  const auto &accClauseList =
      std::get<Fortran::parser::AccClauseList>(beginBlockDirective.t);

  mlir::Location currentLocation = converter.genLocation(blockDirective.source);
  Fortran::lower::StatementContext stmtCtx;

  if (blockDirective.v == llvm::acc::ACCD_parallel) {
    createComputeOp<mlir::acc::ParallelOp>(converter, currentLocation, eval,
                                           semanticsContext, stmtCtx,
                                           accClauseList);
  } else if (blockDirective.v == llvm::acc::ACCD_data) {
    genACCDataOp(converter, currentLocation, eval, semanticsContext, stmtCtx,
                 accClauseList);
  } else if (blockDirective.v == llvm::acc::ACCD_serial) {
    createComputeOp<mlir::acc::SerialOp>(converter, currentLocation, eval,
                                         semanticsContext, stmtCtx,
                                         accClauseList);
  } else if (blockDirective.v == llvm::acc::ACCD_kernels) {
    createComputeOp<mlir::acc::KernelsOp>(converter, currentLocation, eval,
                                          semanticsContext, stmtCtx,
                                          accClauseList);
  } else if (blockDirective.v == llvm::acc::ACCD_host_data) {
    genACCHostDataOp(converter, currentLocation, eval, semanticsContext,
                     stmtCtx, accClauseList);
  }
}

static void
genACC(Fortran::lower::AbstractConverter &converter,
       Fortran::semantics::SemanticsContext &semanticsContext,
       Fortran::lower::pft::Evaluation &eval,
       const Fortran::parser::OpenACCCombinedConstruct &combinedConstruct) {
  const auto &beginCombinedDirective =
      std::get<Fortran::parser::AccBeginCombinedDirective>(combinedConstruct.t);
  const auto &combinedDirective =
      std::get<Fortran::parser::AccCombinedDirective>(beginCombinedDirective.t);
  const auto &accClauseList =
      std::get<Fortran::parser::AccClauseList>(beginCombinedDirective.t);
  const auto &outerDoConstruct =
      std::get<std::optional<Fortran::parser::DoConstruct>>(
          combinedConstruct.t);

  mlir::Location currentLocation =
      converter.genLocation(beginCombinedDirective.source);
  Fortran::lower::StatementContext stmtCtx;

  if (combinedDirective.v == llvm::acc::ACCD_kernels_loop) {
    createComputeOp<mlir::acc::KernelsOp>(
        converter, currentLocation, eval, semanticsContext, stmtCtx,
        accClauseList, mlir::acc::CombinedConstructsType::KernelsLoop);
    createLoopOp(converter, currentLocation, semanticsContext, stmtCtx,
                 *outerDoConstruct, eval, accClauseList,
                 mlir::acc::CombinedConstructsType::KernelsLoop);
  } else if (combinedDirective.v == llvm::acc::ACCD_parallel_loop) {
    createComputeOp<mlir::acc::ParallelOp>(
        converter, currentLocation, eval, semanticsContext, stmtCtx,
        accClauseList, mlir::acc::CombinedConstructsType::ParallelLoop);
    createLoopOp(converter, currentLocation, semanticsContext, stmtCtx,
                 *outerDoConstruct, eval, accClauseList,
                 mlir::acc::CombinedConstructsType::ParallelLoop);
  } else if (combinedDirective.v == llvm::acc::ACCD_serial_loop) {
    createComputeOp<mlir::acc::SerialOp>(
        converter, currentLocation, eval, semanticsContext, stmtCtx,
        accClauseList, mlir::acc::CombinedConstructsType::SerialLoop);
    createLoopOp(converter, currentLocation, semanticsContext, stmtCtx,
                 *outerDoConstruct, eval, accClauseList,
                 mlir::acc::CombinedConstructsType::SerialLoop);
  } else {
    llvm::report_fatal_error("Unknown combined construct encountered");
  }
}

static void
genACCEnterDataOp(Fortran::lower::AbstractConverter &converter,
                  mlir::Location currentLocation,
                  Fortran::semantics::SemanticsContext &semanticsContext,
                  Fortran::lower::StatementContext &stmtCtx,
                  const Fortran::parser::AccClauseList &accClauseList) {
  mlir::Value ifCond, async, waitDevnum;
  llvm::SmallVector<mlir::Value> waitOperands, dataClauseOperands;

  // Async, wait and self clause have optional values but can be present with
  // no value as well. When there is no value, the op has an attribute to
  // represent the clause.
  bool addAsyncAttr = false;
  bool addWaitAttr = false;

  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();

  // Lower clauses values mapped to operands.
  // Keep track of each group of operands separately as clauses can appear
  // more than once.

  // Process the async clause first.
  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
    if (const auto *asyncClause =
            std::get_if<Fortran::parser::AccClause::Async>(&clause.u)) {
      genAsyncClause(converter, asyncClause, async, addAsyncAttr, stmtCtx);
    }
  }

  // The async clause of 'enter data' applies to all device types,
  // so propagate the async clause to copyin/create/attach ops
  // as if it is an async clause without preceding device_type clause.
  llvm::SmallVector<mlir::Attribute> asyncDeviceTypes, asyncOnlyDeviceTypes;
  llvm::SmallVector<mlir::Value> asyncValues;
  auto noneDeviceTypeAttr = mlir::acc::DeviceTypeAttr::get(
      firOpBuilder.getContext(), mlir::acc::DeviceType::None);
  if (addAsyncAttr) {
    asyncOnlyDeviceTypes.push_back(noneDeviceTypeAttr);
  } else if (async) {
    asyncValues.push_back(async);
    asyncDeviceTypes.push_back(noneDeviceTypeAttr);
  }

  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
    mlir::Location clauseLocation = converter.genLocation(clause.source);
    if (const auto *ifClause =
            std::get_if<Fortran::parser::AccClause::If>(&clause.u)) {
      genIfClause(converter, clauseLocation, ifClause, ifCond, stmtCtx);
    } else if (const auto *waitClause =
                   std::get_if<Fortran::parser::AccClause::Wait>(&clause.u)) {
      genWaitClause(converter, waitClause, waitOperands, waitDevnum,
                    addWaitAttr, stmtCtx);
    } else if (const auto *copyinClause =
                   std::get_if<Fortran::parser::AccClause::Copyin>(&clause.u)) {
      const Fortran::parser::AccObjectListWithModifier &listWithModifier =
          copyinClause->v;
      const auto &accObjectList =
          std::get<Fortran::parser::AccObjectList>(listWithModifier.t);
      genDataOperandOperations<mlir::acc::CopyinOp>(
          accObjectList, converter, semanticsContext, stmtCtx,
          dataClauseOperands, mlir::acc::DataClause::acc_copyin, false,
          /*implicit=*/false, asyncValues, asyncDeviceTypes,
          asyncOnlyDeviceTypes);
    } else if (const auto *createClause =
                   std::get_if<Fortran::parser::AccClause::Create>(&clause.u)) {
      const Fortran::parser::AccObjectListWithModifier &listWithModifier =
          createClause->v;
      const auto &accObjectList =
          std::get<Fortran::parser::AccObjectList>(listWithModifier.t);
      const auto &modifier =
          std::get<std::optional<Fortran::parser::AccDataModifier>>(
              listWithModifier.t);
      mlir::acc::DataClause clause = mlir::acc::DataClause::acc_create;
      if (modifier &&
          (*modifier).v == Fortran::parser::AccDataModifier::Modifier::Zero)
        clause = mlir::acc::DataClause::acc_create_zero;
      genDataOperandOperations<mlir::acc::CreateOp>(
          accObjectList, converter, semanticsContext, stmtCtx,
          dataClauseOperands, clause, false, /*implicit=*/false, asyncValues,
          asyncDeviceTypes, asyncOnlyDeviceTypes);
    } else if (const auto *attachClause =
                   std::get_if<Fortran::parser::AccClause::Attach>(&clause.u)) {
      genDataOperandOperations<mlir::acc::AttachOp>(
          attachClause->v, converter, semanticsContext, stmtCtx,
          dataClauseOperands, mlir::acc::DataClause::acc_attach, false,
          /*implicit=*/false, asyncValues, asyncDeviceTypes,
          asyncOnlyDeviceTypes);
    } else if (!std::get_if<Fortran::parser::AccClause::Async>(&clause.u)) {
      llvm::report_fatal_error(
          "Unknown clause in ENTER DATA directive lowering");
    }
  }

  // Prepare the operand segment size attribute and the operands value range.
  llvm::SmallVector<mlir::Value, 16> operands;
  llvm::SmallVector<int32_t, 8> operandSegments;
  addOperand(operands, operandSegments, ifCond);
  addOperand(operands, operandSegments, async);
  addOperand(operands, operandSegments, waitDevnum);
  addOperands(operands, operandSegments, waitOperands);
  addOperands(operands, operandSegments, dataClauseOperands);

  mlir::acc::EnterDataOp enterDataOp = createSimpleOp<mlir::acc::EnterDataOp>(
      firOpBuilder, currentLocation, operands, operandSegments);

  if (addAsyncAttr)
    enterDataOp.setAsyncAttr(firOpBuilder.getUnitAttr());
  if (addWaitAttr)
    enterDataOp.setWaitAttr(firOpBuilder.getUnitAttr());
}

static void
genACCExitDataOp(Fortran::lower::AbstractConverter &converter,
                 mlir::Location currentLocation,
                 Fortran::semantics::SemanticsContext &semanticsContext,
                 Fortran::lower::StatementContext &stmtCtx,
                 const Fortran::parser::AccClauseList &accClauseList) {
  mlir::Value ifCond, async, waitDevnum;
  llvm::SmallVector<mlir::Value> waitOperands, dataClauseOperands,
      copyoutOperands, deleteOperands, detachOperands;

  // Async and wait clause have optional values but can be present with
  // no value as well. When there is no value, the op has an attribute to
  // represent the clause.
  bool addAsyncAttr = false;
  bool addWaitAttr = false;
  bool addFinalizeAttr = false;

  fir::FirOpBuilder &builder = converter.getFirOpBuilder();

  // Lower clauses values mapped to operands.
  // Keep track of each group of operands separately as clauses can appear
  // more than once.

  // Process the async clause first.
  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
    if (const auto *asyncClause =
            std::get_if<Fortran::parser::AccClause::Async>(&clause.u)) {
      genAsyncClause(converter, asyncClause, async, addAsyncAttr, stmtCtx);
    }
  }

  // The async clause of 'exit data' applies to all device types,
  // so propagate the async clause to copyin/create/attach ops
  // as if it is an async clause without preceding device_type clause.
  llvm::SmallVector<mlir::Attribute> asyncDeviceTypes, asyncOnlyDeviceTypes;
  llvm::SmallVector<mlir::Value> asyncValues;
  auto noneDeviceTypeAttr = mlir::acc::DeviceTypeAttr::get(
      builder.getContext(), mlir::acc::DeviceType::None);
  if (addAsyncAttr) {
    asyncOnlyDeviceTypes.push_back(noneDeviceTypeAttr);
  } else if (async) {
    asyncValues.push_back(async);
    asyncDeviceTypes.push_back(noneDeviceTypeAttr);
  }

  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
    mlir::Location clauseLocation = converter.genLocation(clause.source);
    if (const auto *ifClause =
            std::get_if<Fortran::parser::AccClause::If>(&clause.u)) {
      genIfClause(converter, clauseLocation, ifClause, ifCond, stmtCtx);
    } else if (const auto *waitClause =
                   std::get_if<Fortran::parser::AccClause::Wait>(&clause.u)) {
      genWaitClause(converter, waitClause, waitOperands, waitDevnum,
                    addWaitAttr, stmtCtx);
    } else if (const auto *copyoutClause =
                   std::get_if<Fortran::parser::AccClause::Copyout>(
                       &clause.u)) {
      const Fortran::parser::AccObjectListWithModifier &listWithModifier =
          copyoutClause->v;
      const auto &accObjectList =
          std::get<Fortran::parser::AccObjectList>(listWithModifier.t);
      genDataOperandOperations<mlir::acc::GetDevicePtrOp>(
          accObjectList, converter, semanticsContext, stmtCtx, copyoutOperands,
          mlir::acc::DataClause::acc_copyout, false, /*implicit=*/false,
          asyncValues, asyncDeviceTypes, asyncOnlyDeviceTypes);
    } else if (const auto *deleteClause =
                   std::get_if<Fortran::parser::AccClause::Delete>(&clause.u)) {
      genDataOperandOperations<mlir::acc::GetDevicePtrOp>(
          deleteClause->v, converter, semanticsContext, stmtCtx, deleteOperands,
          mlir::acc::DataClause::acc_delete, false, /*implicit=*/false,
          asyncValues, asyncDeviceTypes, asyncOnlyDeviceTypes);
    } else if (const auto *detachClause =
                   std::get_if<Fortran::parser::AccClause::Detach>(&clause.u)) {
      genDataOperandOperations<mlir::acc::GetDevicePtrOp>(
          detachClause->v, converter, semanticsContext, stmtCtx, detachOperands,
          mlir::acc::DataClause::acc_detach, false, /*implicit=*/false,
          asyncValues, asyncDeviceTypes, asyncOnlyDeviceTypes);
    } else if (std::get_if<Fortran::parser::AccClause::Finalize>(&clause.u)) {
      addFinalizeAttr = true;
    }
  }

  dataClauseOperands.append(copyoutOperands);
  dataClauseOperands.append(deleteOperands);
  dataClauseOperands.append(detachOperands);

  // Prepare the operand segment size attribute and the operands value range.
  llvm::SmallVector<mlir::Value, 14> operands;
  llvm::SmallVector<int32_t, 7> operandSegments;
  addOperand(operands, operandSegments, ifCond);
  addOperand(operands, operandSegments, async);
  addOperand(operands, operandSegments, waitDevnum);
  addOperands(operands, operandSegments, waitOperands);
  addOperands(operands, operandSegments, dataClauseOperands);

  mlir::acc::ExitDataOp exitDataOp = createSimpleOp<mlir::acc::ExitDataOp>(
      builder, currentLocation, operands, operandSegments);

  if (addAsyncAttr)
    exitDataOp.setAsyncAttr(builder.getUnitAttr());
  if (addWaitAttr)
    exitDataOp.setWaitAttr(builder.getUnitAttr());
  if (addFinalizeAttr)
    exitDataOp.setFinalizeAttr(builder.getUnitAttr());

  genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::CopyoutOp>(
      builder, copyoutOperands, /*structured=*/false);
  genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::DeleteOp>(
      builder, deleteOperands, /*structured=*/false);
  genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::DetachOp>(
      builder, detachOperands, /*structured=*/false);
}

template <typename Op>
static void
genACCInitShutdownOp(Fortran::lower::AbstractConverter &converter,
                     mlir::Location currentLocation,
                     const Fortran::parser::AccClauseList &accClauseList) {
  mlir::Value ifCond, deviceNum;

  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
  Fortran::lower::StatementContext stmtCtx;
  llvm::SmallVector<mlir::Attribute> deviceTypes;

  // Lower clauses values mapped to operands.
  // Keep track of each group of operands separately as clauses can appear
  // more than once.
  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
    mlir::Location clauseLocation = converter.genLocation(clause.source);
    if (const auto *ifClause =
            std::get_if<Fortran::parser::AccClause::If>(&clause.u)) {
      genIfClause(converter, clauseLocation, ifClause, ifCond, stmtCtx);
    } else if (const auto *deviceNumClause =
                   std::get_if<Fortran::parser::AccClause::DeviceNum>(
                       &clause.u)) {
      deviceNum = fir::getBase(converter.genExprValue(
          *Fortran::semantics::GetExpr(deviceNumClause->v), stmtCtx));
    } else if (const auto *deviceTypeClause =
                   std::get_if<Fortran::parser::AccClause::DeviceType>(
                       &clause.u)) {
      gatherDeviceTypeAttrs(builder, deviceTypeClause, deviceTypes);
    }
  }

  // Prepare the operand segment size attribute and the operands value range.
  llvm::SmallVector<mlir::Value, 6> operands;
  llvm::SmallVector<int32_t, 2> operandSegments;

  addOperand(operands, operandSegments, deviceNum);
  addOperand(operands, operandSegments, ifCond);

  Op op =
      createSimpleOp<Op>(builder, currentLocation, operands, operandSegments);
  if (!deviceTypes.empty())
    op.setDeviceTypesAttr(
        mlir::ArrayAttr::get(builder.getContext(), deviceTypes));
}

void genACCSetOp(Fortran::lower::AbstractConverter &converter,
                 mlir::Location currentLocation,
                 const Fortran::parser::AccClauseList &accClauseList) {
  mlir::Value ifCond, deviceNum, defaultAsync;
  llvm::SmallVector<mlir::Value> deviceTypeOperands;

  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
  Fortran::lower::StatementContext stmtCtx;
  llvm::SmallVector<mlir::Attribute> deviceTypes;

  // Lower clauses values mapped to operands.
  // Keep track of each group of operands separately as clauses can appear
  // more than once.
  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
    mlir::Location clauseLocation = converter.genLocation(clause.source);
    if (const auto *ifClause =
            std::get_if<Fortran::parser::AccClause::If>(&clause.u)) {
      genIfClause(converter, clauseLocation, ifClause, ifCond, stmtCtx);
    } else if (const auto *defaultAsyncClause =
                   std::get_if<Fortran::parser::AccClause::DefaultAsync>(
                       &clause.u)) {
      defaultAsync = fir::getBase(converter.genExprValue(
          *Fortran::semantics::GetExpr(defaultAsyncClause->v), stmtCtx));
    } else if (const auto *deviceNumClause =
                   std::get_if<Fortran::parser::AccClause::DeviceNum>(
                       &clause.u)) {
      deviceNum = fir::getBase(converter.genExprValue(
          *Fortran::semantics::GetExpr(deviceNumClause->v), stmtCtx));
    } else if (const auto *deviceTypeClause =
                   std::get_if<Fortran::parser::AccClause::DeviceType>(
                       &clause.u)) {
      gatherDeviceTypeAttrs(builder, deviceTypeClause, deviceTypes);
    }
  }

  // Prepare the operand segment size attribute and the operands value range.
  llvm::SmallVector<mlir::Value> operands;
  llvm::SmallVector<int32_t, 3> operandSegments;
  addOperand(operands, operandSegments, defaultAsync);
  addOperand(operands, operandSegments, deviceNum);
  addOperand(operands, operandSegments, ifCond);

  auto op = createSimpleOp<mlir::acc::SetOp>(builder, currentLocation, operands,
                                             operandSegments);
  if (!deviceTypes.empty()) {
    assert(deviceTypes.size() == 1 && "expect only one value for acc.set");
    op.setDeviceTypeAttr(mlir::cast<mlir::acc::DeviceTypeAttr>(deviceTypes[0]));
  }
}

static inline mlir::ArrayAttr
getArrayAttr(fir::FirOpBuilder &b,
             llvm::SmallVector<mlir::Attribute> &attributes) {
  return attributes.empty() ? nullptr : b.getArrayAttr(attributes);
}

static inline mlir::ArrayAttr
getBoolArrayAttr(fir::FirOpBuilder &b, llvm::SmallVector<bool> &values) {
  return values.empty() ? nullptr : b.getBoolArrayAttr(values);
}

static inline mlir::DenseI32ArrayAttr
getDenseI32ArrayAttr(fir::FirOpBuilder &builder,
                     llvm::SmallVector<int32_t> &values) {
  return values.empty() ? nullptr : builder.getDenseI32ArrayAttr(values);
}

static void
genACCUpdateOp(Fortran::lower::AbstractConverter &converter,
               mlir::Location currentLocation,
               Fortran::semantics::SemanticsContext &semanticsContext,
               Fortran::lower::StatementContext &stmtCtx,
               const Fortran::parser::AccClauseList &accClauseList) {
  mlir::Value ifCond;
  llvm::SmallVector<mlir::Value> dataClauseOperands, updateHostOperands,
      waitOperands, deviceTypeOperands, asyncOperands;
  llvm::SmallVector<mlir::Attribute> asyncOperandsDeviceTypes,
      asyncOnlyDeviceTypes, waitOperandsDeviceTypes, waitOnlyDeviceTypes;
  llvm::SmallVector<bool> hasWaitDevnums;
  llvm::SmallVector<int32_t> waitOperandsSegments;

  fir::FirOpBuilder &builder = converter.getFirOpBuilder();

  // device_type attribute is set to `none` until a device_type clause is
  // encountered.
  llvm::SmallVector<mlir::Attribute> crtDeviceTypes;
  crtDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
      builder.getContext(), mlir::acc::DeviceType::None));

  bool ifPresent = false;

  // Lower clauses values mapped to operands and array attributes.
  // Keep track of each group of operands separately as clauses can appear
  // more than once.

  // Process the clauses that may have a specified device_type first.
  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
    if (const auto *asyncClause =
            std::get_if<Fortran::parser::AccClause::Async>(&clause.u)) {
      genAsyncClause(converter, asyncClause, asyncOperands,
                     asyncOperandsDeviceTypes, asyncOnlyDeviceTypes,
                     crtDeviceTypes, stmtCtx);
    } else if (const auto *waitClause =
                   std::get_if<Fortran::parser::AccClause::Wait>(&clause.u)) {
      genWaitClauseWithDeviceType(converter, waitClause, waitOperands,
                                  waitOperandsDeviceTypes, waitOnlyDeviceTypes,
                                  hasWaitDevnums, waitOperandsSegments,
                                  crtDeviceTypes, stmtCtx);
    } else if (const auto *deviceTypeClause =
                   std::get_if<Fortran::parser::AccClause::DeviceType>(
                       &clause.u)) {
      crtDeviceTypes.clear();
      gatherDeviceTypeAttrs(builder, deviceTypeClause, crtDeviceTypes);
    }
  }

  // Process the clauses independent of device_type.
  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
    mlir::Location clauseLocation = converter.genLocation(clause.source);
    if (const auto *ifClause =
            std::get_if<Fortran::parser::AccClause::If>(&clause.u)) {
      genIfClause(converter, clauseLocation, ifClause, ifCond, stmtCtx);
    } else if (const auto *hostClause =
                   std::get_if<Fortran::parser::AccClause::Host>(&clause.u)) {
      genDataOperandOperations<mlir::acc::GetDevicePtrOp>(
          hostClause->v, converter, semanticsContext, stmtCtx,
          updateHostOperands, mlir::acc::DataClause::acc_update_host, false,
          /*implicit=*/false, asyncOperands, asyncOperandsDeviceTypes,
          asyncOnlyDeviceTypes);
    } else if (const auto *deviceClause =
                   std::get_if<Fortran::parser::AccClause::Device>(&clause.u)) {
      genDataOperandOperations<mlir::acc::UpdateDeviceOp>(
          deviceClause->v, converter, semanticsContext, stmtCtx,
          dataClauseOperands, mlir::acc::DataClause::acc_update_device, false,
          /*implicit=*/false, asyncOperands, asyncOperandsDeviceTypes,
          asyncOnlyDeviceTypes);
    } else if (std::get_if<Fortran::parser::AccClause::IfPresent>(&clause.u)) {
      ifPresent = true;
    } else if (const auto *selfClause =
                   std::get_if<Fortran::parser::AccClause::Self>(&clause.u)) {
      const std::optional<Fortran::parser::AccSelfClause> &accSelfClause =
          selfClause->v;
      const auto *accObjectList =
          std::get_if<Fortran::parser::AccObjectList>(&(*accSelfClause).u);
      assert(accObjectList && "expect AccObjectList");
      genDataOperandOperations<mlir::acc::GetDevicePtrOp>(
          *accObjectList, converter, semanticsContext, stmtCtx,
          updateHostOperands, mlir::acc::DataClause::acc_update_self, false,
          /*implicit=*/false, asyncOperands, asyncOperandsDeviceTypes,
          asyncOnlyDeviceTypes);
    }
  }

  dataClauseOperands.append(updateHostOperands);

  builder.create<mlir::acc::UpdateOp>(
      currentLocation, ifCond, asyncOperands,
      getArrayAttr(builder, asyncOperandsDeviceTypes),
      getArrayAttr(builder, asyncOnlyDeviceTypes), waitOperands,
      getDenseI32ArrayAttr(builder, waitOperandsSegments),
      getArrayAttr(builder, waitOperandsDeviceTypes),
      getBoolArrayAttr(builder, hasWaitDevnums),
      getArrayAttr(builder, waitOnlyDeviceTypes), dataClauseOperands,
      ifPresent);

  genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::UpdateHostOp>(
      builder, updateHostOperands, /*structured=*/false);
}

static void
genACC(Fortran::lower::AbstractConverter &converter,
       Fortran::semantics::SemanticsContext &semanticsContext,
       const Fortran::parser::OpenACCStandaloneConstruct &standaloneConstruct) {
  const auto &standaloneDirective =
      std::get<Fortran::parser::AccStandaloneDirective>(standaloneConstruct.t);
  const auto &accClauseList =
      std::get<Fortran::parser::AccClauseList>(standaloneConstruct.t);

  mlir::Location currentLocation =
      converter.genLocation(standaloneDirective.source);
  Fortran::lower::StatementContext stmtCtx;

  if (standaloneDirective.v == llvm::acc::Directive::ACCD_enter_data) {
    genACCEnterDataOp(converter, currentLocation, semanticsContext, stmtCtx,
                      accClauseList);
  } else if (standaloneDirective.v == llvm::acc::Directive::ACCD_exit_data) {
    genACCExitDataOp(converter, currentLocation, semanticsContext, stmtCtx,
                     accClauseList);
  } else if (standaloneDirective.v == llvm::acc::Directive::ACCD_init) {
    genACCInitShutdownOp<mlir::acc::InitOp>(converter, currentLocation,
                                            accClauseList);
  } else if (standaloneDirective.v == llvm::acc::Directive::ACCD_shutdown) {
    genACCInitShutdownOp<mlir::acc::ShutdownOp>(converter, currentLocation,
                                                accClauseList);
  } else if (standaloneDirective.v == llvm::acc::Directive::ACCD_set) {
    genACCSetOp(converter, currentLocation, accClauseList);
  } else if (standaloneDirective.v == llvm::acc::Directive::ACCD_update) {
    genACCUpdateOp(converter, currentLocation, semanticsContext, stmtCtx,
                   accClauseList);
  }
}

static void genACC(Fortran::lower::AbstractConverter &converter,
                   const Fortran::parser::OpenACCWaitConstruct &waitConstruct) {

  const auto &waitArgument =
      std::get<std::optional<Fortran::parser::AccWaitArgument>>(
          waitConstruct.t);
  const auto &accClauseList =
      std::get<Fortran::parser::AccClauseList>(waitConstruct.t);

  mlir::Value ifCond, waitDevnum, async;
  llvm::SmallVector<mlir::Value> waitOperands;

  // Async clause have optional values but can be present with
  // no value as well. When there is no value, the op has an attribute to
  // represent the clause.
  bool addAsyncAttr = false;

  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
  mlir::Location currentLocation = converter.genLocation(waitConstruct.source);
  Fortran::lower::StatementContext stmtCtx;

  if (waitArgument) { // wait has a value.
    const Fortran::parser::AccWaitArgument &waitArg = *waitArgument;
    const auto &waitList =
        std::get<std::list<Fortran::parser::ScalarIntExpr>>(waitArg.t);
    for (const Fortran::parser::ScalarIntExpr &value : waitList) {
      mlir::Value v = fir::getBase(
          converter.genExprValue(*Fortran::semantics::GetExpr(value), stmtCtx));
      waitOperands.push_back(v);
    }

    const auto &waitDevnumValue =
        std::get<std::optional<Fortran::parser::ScalarIntExpr>>(waitArg.t);
    if (waitDevnumValue)
      waitDevnum = fir::getBase(converter.genExprValue(
          *Fortran::semantics::GetExpr(*waitDevnumValue), stmtCtx));
  }

  // Lower clauses values mapped to operands.
  // Keep track of each group of operands separately as clauses can appear
  // more than once.
  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
    mlir::Location clauseLocation = converter.genLocation(clause.source);
    if (const auto *ifClause =
            std::get_if<Fortran::parser::AccClause::If>(&clause.u)) {
      genIfClause(converter, clauseLocation, ifClause, ifCond, stmtCtx);
    } else if (const auto *asyncClause =
                   std::get_if<Fortran::parser::AccClause::Async>(&clause.u)) {
      genAsyncClause(converter, asyncClause, async, addAsyncAttr, stmtCtx);
    }
  }

  // Prepare the operand segment size attribute and the operands value range.
  llvm::SmallVector<mlir::Value> operands;
  llvm::SmallVector<int32_t> operandSegments;
  addOperands(operands, operandSegments, waitOperands);
  addOperand(operands, operandSegments, async);
  addOperand(operands, operandSegments, waitDevnum);
  addOperand(operands, operandSegments, ifCond);

  mlir::acc::WaitOp waitOp = createSimpleOp<mlir::acc::WaitOp>(
      firOpBuilder, currentLocation, operands, operandSegments);

  if (addAsyncAttr)
    waitOp.setAsyncAttr(firOpBuilder.getUnitAttr());
}

template <typename GlobalOp, typename EntryOp, typename DeclareOp,
          typename ExitOp>
static void createDeclareGlobalOp(mlir::OpBuilder &modBuilder,
                                  fir::FirOpBuilder &builder,
                                  mlir::Location loc, fir::GlobalOp globalOp,
                                  mlir::acc::DataClause clause,
                                  const std::string &declareGlobalName,
                                  bool implicit, std::stringstream &asFortran) {
  GlobalOp declareGlobalOp =
      modBuilder.create<GlobalOp>(loc, declareGlobalName);
  builder.createBlock(&declareGlobalOp.getRegion(),
                      declareGlobalOp.getRegion().end(), {}, {});
  builder.setInsertionPointToEnd(&declareGlobalOp.getRegion().back());

  fir::AddrOfOp addrOp = builder.create<fir::AddrOfOp>(
      loc, fir::ReferenceType::get(globalOp.getType()), globalOp.getSymbol());
  addDeclareAttr(builder, addrOp, clause);

  llvm::SmallVector<mlir::Value> bounds;
  EntryOp entryOp = createDataEntryOp<EntryOp>(
      builder, loc, addrOp.getResTy(), asFortran, bounds,
      /*structured=*/false, implicit, clause, addrOp.getResTy().getType(),
      /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
  if constexpr (std::is_same_v<DeclareOp, mlir::acc::DeclareEnterOp>)
    builder.create<DeclareOp>(
        loc, mlir::acc::DeclareTokenType::get(entryOp.getContext()),
        mlir::ValueRange(entryOp.getAccPtr()));
  else
    builder.create<DeclareOp>(loc, mlir::Value{},
                              mlir::ValueRange(entryOp.getAccPtr()));
  if constexpr (std::is_same_v<GlobalOp, mlir::acc::GlobalDestructorOp>) {
    builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(),
                           entryOp.getBounds(), entryOp.getAsyncOperands(),
                           entryOp.getAsyncOperandsDeviceTypeAttr(),
                           entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
                           /*structured=*/false, /*implicit=*/false,
                           builder.getStringAttr(*entryOp.getName()));
  }
  builder.create<mlir::acc::TerminatorOp>(loc);
  modBuilder.setInsertionPointAfter(declareGlobalOp);
}

template <typename EntryOp>
static void createDeclareAllocFunc(mlir::OpBuilder &modBuilder,
                                   fir::FirOpBuilder &builder,
                                   mlir::Location loc, fir::GlobalOp &globalOp,
                                   mlir::acc::DataClause clause) {
  std::stringstream registerFuncName;
  registerFuncName << globalOp.getSymName().str()
                   << Fortran::lower::declarePostAllocSuffix.str();
  auto registerFuncOp =
      createDeclareFunc(modBuilder, builder, loc, registerFuncName.str());

  fir::AddrOfOp addrOp = builder.create<fir::AddrOfOp>(
      loc, fir::ReferenceType::get(globalOp.getType()), globalOp.getSymbol());

  std::stringstream asFortran;
  asFortran << Fortran::lower::mangle::demangleName(globalOp.getSymName());
  std::stringstream asFortranDesc;
  asFortranDesc << asFortran.str() << accFirDescriptorPostfix.str();
  llvm::SmallVector<mlir::Value> bounds;

  // Updating descriptor must occur before the mapping of the data so that
  // attached data pointer is not overwritten.
  mlir::acc::UpdateDeviceOp updateDeviceOp =
      createDataEntryOp<mlir::acc::UpdateDeviceOp>(
          builder, loc, addrOp, asFortranDesc, bounds,
          /*structured=*/false, /*implicit=*/true,
          mlir::acc::DataClause::acc_update_device, addrOp.getType(),
          /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
  llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
  llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
  createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands, operandSegments);

  auto loadOp = builder.create<fir::LoadOp>(loc, addrOp.getResult());
  fir::BoxAddrOp boxAddrOp = builder.create<fir::BoxAddrOp>(loc, loadOp);
  addDeclareAttr(builder, boxAddrOp.getOperation(), clause);
  EntryOp entryOp = createDataEntryOp<EntryOp>(
      builder, loc, boxAddrOp.getResult(), asFortran, bounds,
      /*structured=*/false, /*implicit=*/false, clause, boxAddrOp.getType(),
      /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
  builder.create<mlir::acc::DeclareEnterOp>(
      loc, mlir::acc::DeclareTokenType::get(entryOp.getContext()),
      mlir::ValueRange(entryOp.getAccPtr()));

  modBuilder.setInsertionPointAfter(registerFuncOp);
}

/// Action to be performed on deallocation are split in two distinct functions.
/// - Pre deallocation function includes all the action to be performed before
///   the actual deallocation is done on the host side.
/// - Post deallocation function includes update to the descriptor.
template <typename ExitOp>
static void createDeclareDeallocFunc(mlir::OpBuilder &modBuilder,
                                     fir::FirOpBuilder &builder,
                                     mlir::Location loc,
                                     fir::GlobalOp &globalOp,
                                     mlir::acc::DataClause clause) {

  // Generate the pre dealloc function.
  std::stringstream preDeallocFuncName;
  preDeallocFuncName << globalOp.getSymName().str()
                     << Fortran::lower::declarePreDeallocSuffix.str();
  auto preDeallocOp =
      createDeclareFunc(modBuilder, builder, loc, preDeallocFuncName.str());
  fir::AddrOfOp addrOp = builder.create<fir::AddrOfOp>(
      loc, fir::ReferenceType::get(globalOp.getType()), globalOp.getSymbol());
  auto loadOp = builder.create<fir::LoadOp>(loc, addrOp.getResult());
  fir::BoxAddrOp boxAddrOp = builder.create<fir::BoxAddrOp>(loc, loadOp);
  addDeclareAttr(builder, boxAddrOp.getOperation(), clause);

  std::stringstream asFortran;
  asFortran << Fortran::lower::mangle::demangleName(globalOp.getSymName());
  llvm::SmallVector<mlir::Value> bounds;
  mlir::acc::GetDevicePtrOp entryOp =
      createDataEntryOp<mlir::acc::GetDevicePtrOp>(
          builder, loc, boxAddrOp.getResult(), asFortran, bounds,
          /*structured=*/false, /*implicit=*/false, clause, boxAddrOp.getType(),
          /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});

  builder.create<mlir::acc::DeclareExitOp>(
      loc, mlir::Value{}, mlir::ValueRange(entryOp.getAccPtr()));

  if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> ||
                std::is_same_v<ExitOp, mlir::acc::UpdateHostOp>)
    builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(),
                           entryOp.getVarPtr(), entryOp.getBounds(),
                           entryOp.getAsyncOperands(),
                           entryOp.getAsyncOperandsDeviceTypeAttr(),
                           entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
                           /*structured=*/false, /*implicit=*/false,
                           builder.getStringAttr(*entryOp.getName()));
  else
    builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(),
                           entryOp.getBounds(), entryOp.getAsyncOperands(),
                           entryOp.getAsyncOperandsDeviceTypeAttr(),
                           entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
                           /*structured=*/false, /*implicit=*/false,
                           builder.getStringAttr(*entryOp.getName()));

  // Generate the post dealloc function.
  modBuilder.setInsertionPointAfter(preDeallocOp);
  std::stringstream postDeallocFuncName;
  postDeallocFuncName << globalOp.getSymName().str()
                      << Fortran::lower::declarePostDeallocSuffix.str();
  auto postDeallocOp =
      createDeclareFunc(modBuilder, builder, loc, postDeallocFuncName.str());

  addrOp = builder.create<fir::AddrOfOp>(
      loc, fir::ReferenceType::get(globalOp.getType()), globalOp.getSymbol());
  asFortran << accFirDescriptorPostfix.str();
  mlir::acc::UpdateDeviceOp updateDeviceOp =
      createDataEntryOp<mlir::acc::UpdateDeviceOp>(
          builder, loc, addrOp, asFortran, bounds,
          /*structured=*/false, /*implicit=*/true,
          mlir::acc::DataClause::acc_update_device, addrOp.getType(),
          /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
  llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
  llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
  createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands, operandSegments);
  modBuilder.setInsertionPointAfter(postDeallocOp);
}

template <typename EntryOp, typename ExitOp>
static void genGlobalCtors(Fortran::lower::AbstractConverter &converter,
                           mlir::OpBuilder &modBuilder,
                           const Fortran::parser::AccObjectList &accObjectList,
                           mlir::acc::DataClause clause) {
  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
  for (const auto &accObject : accObjectList.v) {
    mlir::Location operandLocation = genOperandLocation(converter, accObject);
    Fortran::common::visit(
        Fortran::common::visitors{
            [&](const Fortran::parser::Designator &designator) {
              if (const auto *name =
                      Fortran::semantics::getDesignatorNameIfDataRef(
                          designator)) {
                std::string globalName = converter.mangleName(*name->symbol);
                fir::GlobalOp globalOp = builder.getNamedGlobal(globalName);
                std::stringstream declareGlobalCtorName;
                declareGlobalCtorName << globalName << "_acc_ctor";
                std::stringstream declareGlobalDtorName;
                declareGlobalDtorName << globalName << "_acc_dtor";
                std::stringstream asFortran;
                asFortran << name->symbol->name().ToString();

                if (builder.getModule()
                        .lookupSymbol<mlir::acc::GlobalConstructorOp>(
                            declareGlobalCtorName.str()))
                  return;

                if (!globalOp) {
                  if (Fortran::semantics::FindEquivalenceSet(*name->symbol)) {
                    for (Fortran::semantics::EquivalenceObject eqObj :
                         *Fortran::semantics::FindEquivalenceSet(
                             *name->symbol)) {
                      std::string eqName = converter.mangleName(eqObj.symbol);
                      globalOp = builder.getNamedGlobal(eqName);
                      if (globalOp)
                        break;
                    }

                    if (!globalOp)
                      llvm::report_fatal_error(
                          "could not retrieve global symbol");
                  } else {
                    llvm::report_fatal_error(
                        "could not retrieve global symbol");
                  }
                }

                addDeclareAttr(builder, globalOp.getOperation(), clause);
                auto crtPos = builder.saveInsertionPoint();
                modBuilder.setInsertionPointAfter(globalOp);
                if (mlir::isa<fir::BaseBoxType>(
                        fir::unwrapRefType(globalOp.getType()))) {
                  createDeclareGlobalOp<mlir::acc::GlobalConstructorOp,
                                        mlir::acc::CopyinOp,
                                        mlir::acc::DeclareEnterOp, ExitOp>(
                      modBuilder, builder, operandLocation, globalOp, clause,
                      declareGlobalCtorName.str(), /*implicit=*/true,
                      asFortran);
                  createDeclareAllocFunc<EntryOp>(
                      modBuilder, builder, operandLocation, globalOp, clause);
                  if constexpr (!std::is_same_v<EntryOp, ExitOp>)
                    createDeclareDeallocFunc<ExitOp>(
                        modBuilder, builder, operandLocation, globalOp, clause);
                } else {
                  createDeclareGlobalOp<mlir::acc::GlobalConstructorOp, EntryOp,
                                        mlir::acc::DeclareEnterOp, ExitOp>(
                      modBuilder, builder, operandLocation, globalOp, clause,
                      declareGlobalCtorName.str(), /*implicit=*/false,
                      asFortran);
                }
                if constexpr (!std::is_same_v<EntryOp, ExitOp>) {
                  createDeclareGlobalOp<mlir::acc::GlobalDestructorOp,
                                        mlir::acc::GetDevicePtrOp,
                                        mlir::acc::DeclareExitOp, ExitOp>(
                      modBuilder, builder, operandLocation, globalOp, clause,
                      declareGlobalDtorName.str(), /*implicit=*/false,
                      asFortran);
                }
                builder.restoreInsertionPoint(crtPos);
              }
            },
            [&](const Fortran::parser::Name &name) {
              TODO(operandLocation, "OpenACC Global Ctor from parser::Name");
            }},
        accObject.u);
  }
}

template <typename Clause, typename EntryOp, typename ExitOp>
static void
genGlobalCtorsWithModifier(Fortran::lower::AbstractConverter &converter,
                           mlir::OpBuilder &modBuilder, const Clause *x,
                           Fortran::parser::AccDataModifier::Modifier mod,
                           const mlir::acc::DataClause clause,
                           const mlir::acc::DataClause clauseWithModifier) {
  const Fortran::parser::AccObjectListWithModifier &listWithModifier = x->v;
  const auto &accObjectList =
      std::get<Fortran::parser::AccObjectList>(listWithModifier.t);
  const auto &modifier =
      std::get<std::optional<Fortran::parser::AccDataModifier>>(
          listWithModifier.t);
  mlir::acc::DataClause dataClause =
      (modifier && (*modifier).v == mod) ? clauseWithModifier : clause;
  genGlobalCtors<EntryOp, ExitOp>(converter, modBuilder, accObjectList,
                                  dataClause);
}

static void
genDeclareInFunction(Fortran::lower::AbstractConverter &converter,
                     Fortran::semantics::SemanticsContext &semanticsContext,
                     Fortran::lower::StatementContext &openAccCtx,
                     mlir::Location loc,
                     const Fortran::parser::AccClauseList &accClauseList) {
  llvm::SmallVector<mlir::Value> dataClauseOperands, copyEntryOperands,
      createEntryOperands, copyoutEntryOperands, deviceResidentEntryOperands;
  Fortran::lower::StatementContext stmtCtx;
  fir::FirOpBuilder &builder = converter.getFirOpBuilder();

  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
    if (const auto *copyClause =
            std::get_if<Fortran::parser::AccClause::Copy>(&clause.u)) {
      auto crtDataStart = dataClauseOperands.size();
      genDeclareDataOperandOperations<mlir::acc::CopyinOp,
                                      mlir::acc::CopyoutOp>(
          copyClause->v, converter, semanticsContext, stmtCtx,
          dataClauseOperands, mlir::acc::DataClause::acc_copy,
          /*structured=*/true, /*implicit=*/false);
      copyEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
                               dataClauseOperands.end());
    } else if (const auto *createClause =
                   std::get_if<Fortran::parser::AccClause::Create>(&clause.u)) {
      const Fortran::parser::AccObjectListWithModifier &listWithModifier =
          createClause->v;
      const auto &accObjectList =
          std::get<Fortran::parser::AccObjectList>(listWithModifier.t);
      auto crtDataStart = dataClauseOperands.size();
      genDeclareDataOperandOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>(
          accObjectList, converter, semanticsContext, stmtCtx,
          dataClauseOperands, mlir::acc::DataClause::acc_create,
          /*structured=*/true, /*implicit=*/false);
      createEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
                                 dataClauseOperands.end());
    } else if (const auto *presentClause =
                   std::get_if<Fortran::parser::AccClause::Present>(
                       &clause.u)) {
      genDeclareDataOperandOperations<mlir::acc::PresentOp,
                                      mlir::acc::PresentOp>(
          presentClause->v, converter, semanticsContext, stmtCtx,
          dataClauseOperands, mlir::acc::DataClause::acc_present,
          /*structured=*/true, /*implicit=*/false);
    } else if (const auto *copyinClause =
                   std::get_if<Fortran::parser::AccClause::Copyin>(&clause.u)) {
      genDeclareDataOperandOperationsWithModifier<mlir::acc::CopyinOp,
                                                  mlir::acc::DeleteOp>(
          copyinClause, converter, semanticsContext, stmtCtx,
          Fortran::parser::AccDataModifier::Modifier::ReadOnly,
          dataClauseOperands, mlir::acc::DataClause::acc_copyin,
          mlir::acc::DataClause::acc_copyin_readonly);
    } else if (const auto *copyoutClause =
                   std::get_if<Fortran::parser::AccClause::Copyout>(
                       &clause.u)) {
      const Fortran::parser::AccObjectListWithModifier &listWithModifier =
          copyoutClause->v;
      const auto &accObjectList =
          std::get<Fortran::parser::AccObjectList>(listWithModifier.t);
      auto crtDataStart = dataClauseOperands.size();
      genDeclareDataOperandOperations<mlir::acc::CreateOp,
                                      mlir::acc::CopyoutOp>(
          accObjectList, converter, semanticsContext, stmtCtx,
          dataClauseOperands, mlir::acc::DataClause::acc_copyout,
          /*structured=*/true, /*implicit=*/false);
      copyoutEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
                                  dataClauseOperands.end());
    } else if (const auto *devicePtrClause =
                   std::get_if<Fortran::parser::AccClause::Deviceptr>(
                       &clause.u)) {
      genDeclareDataOperandOperations<mlir::acc::DevicePtrOp,
                                      mlir::acc::DevicePtrOp>(
          devicePtrClause->v, converter, semanticsContext, stmtCtx,
          dataClauseOperands, mlir::acc::DataClause::acc_deviceptr,
          /*structured=*/true, /*implicit=*/false);
    } else if (const auto *linkClause =
                   std::get_if<Fortran::parser::AccClause::Link>(&clause.u)) {
      genDeclareDataOperandOperations<mlir::acc::DeclareLinkOp,
                                      mlir::acc::DeclareLinkOp>(
          linkClause->v, converter, semanticsContext, stmtCtx,
          dataClauseOperands, mlir::acc::DataClause::acc_declare_link,
          /*structured=*/true, /*implicit=*/false);
    } else if (const auto *deviceResidentClause =
                   std::get_if<Fortran::parser::AccClause::DeviceResident>(
                       &clause.u)) {
      auto crtDataStart = dataClauseOperands.size();
      genDeclareDataOperandOperations<mlir::acc::DeclareDeviceResidentOp,
                                      mlir::acc::DeleteOp>(
          deviceResidentClause->v, converter, semanticsContext, stmtCtx,
          dataClauseOperands,
          mlir::acc::DataClause::acc_declare_device_resident,
          /*structured=*/true, /*implicit=*/false);
      deviceResidentEntryOperands.append(
          dataClauseOperands.begin() + crtDataStart, dataClauseOperands.end());
    } else {
      mlir::Location clauseLocation = converter.genLocation(clause.source);
      TODO(clauseLocation, "clause on declare directive");
    }
  }

  mlir::func::FuncOp funcOp = builder.getFunction();
  auto ops = funcOp.getOps<mlir::acc::DeclareEnterOp>();
  mlir::Value declareToken;
  if (ops.empty()) {
    declareToken = builder.create<mlir::acc::DeclareEnterOp>(
        loc, mlir::acc::DeclareTokenType::get(builder.getContext()),
        dataClauseOperands);
  } else {
    auto declareOp = *ops.begin();
    auto newDeclareOp = builder.create<mlir::acc::DeclareEnterOp>(
        loc, mlir::acc::DeclareTokenType::get(builder.getContext()),
        declareOp.getDataClauseOperands());
    newDeclareOp.getDataClauseOperandsMutable().append(dataClauseOperands);
    declareToken = newDeclareOp.getToken();
    declareOp.erase();
  }

  openAccCtx.attachCleanup([&builder, loc, createEntryOperands,
                            copyEntryOperands, copyoutEntryOperands,
                            deviceResidentEntryOperands, declareToken]() {
    llvm::SmallVector<mlir::Value> operands;
    operands.append(createEntryOperands);
    operands.append(deviceResidentEntryOperands);
    operands.append(copyEntryOperands);
    operands.append(copyoutEntryOperands);

    mlir::func::FuncOp funcOp = builder.getFunction();
    auto ops = funcOp.getOps<mlir::acc::DeclareExitOp>();
    if (ops.empty()) {
      builder.create<mlir::acc::DeclareExitOp>(loc, declareToken, operands);
    } else {
      auto declareOp = *ops.begin();
      declareOp.getDataClauseOperandsMutable().append(operands);
    }

    genDataExitOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>(
        builder, createEntryOperands, /*structured=*/true);
    genDataExitOperations<mlir::acc::DeclareDeviceResidentOp,
                          mlir::acc::DeleteOp>(
        builder, deviceResidentEntryOperands, /*structured=*/true);
    genDataExitOperations<mlir::acc::CopyinOp, mlir::acc::CopyoutOp>(
        builder, copyEntryOperands, /*structured=*/true);
    genDataExitOperations<mlir::acc::CreateOp, mlir::acc::CopyoutOp>(
        builder, copyoutEntryOperands, /*structured=*/true);
  });
}

static void
genDeclareInModule(Fortran::lower::AbstractConverter &converter,
                   mlir::ModuleOp &moduleOp,
                   const Fortran::parser::AccClauseList &accClauseList) {
  mlir::OpBuilder modBuilder(moduleOp.getBodyRegion());
  for (const Fortran::parser::AccClause &clause : accClauseList.v) {
    if (const auto *createClause =
            std::get_if<Fortran::parser::AccClause::Create>(&clause.u)) {
      const Fortran::parser::AccObjectListWithModifier &listWithModifier =
          createClause->v;
      const auto &accObjectList =
          std::get<Fortran::parser::AccObjectList>(listWithModifier.t);
      genGlobalCtors<mlir::acc::CreateOp, mlir::acc::DeleteOp>(
          converter, modBuilder, accObjectList,
          mlir::acc::DataClause::acc_create);
    } else if (const auto *copyinClause =
                   std::get_if<Fortran::parser::AccClause::Copyin>(&clause.u)) {
      genGlobalCtorsWithModifier<Fortran::parser::AccClause::Copyin,
                                 mlir::acc::CopyinOp, mlir::acc::CopyinOp>(
          converter, modBuilder, copyinClause,
          Fortran::parser::AccDataModifier::Modifier::ReadOnly,
          mlir::acc::DataClause::acc_copyin,
          mlir::acc::DataClause::acc_copyin_readonly);
    } else if (const auto *deviceResidentClause =
                   std::get_if<Fortran::parser::AccClause::DeviceResident>(
                       &clause.u)) {
      genGlobalCtors<mlir::acc::DeclareDeviceResidentOp, mlir::acc::DeleteOp>(
          converter, modBuilder, deviceResidentClause->v,
          mlir::acc::DataClause::acc_declare_device_resident);
    } else if (const auto *linkClause =
                   std::get_if<Fortran::parser::AccClause::Link>(&clause.u)) {
      genGlobalCtors<mlir::acc::DeclareLinkOp, mlir::acc::DeclareLinkOp>(
          converter, modBuilder, linkClause->v,
          mlir::acc::DataClause::acc_declare_link);
    } else {
      llvm::report_fatal_error("unsupported clause on DECLARE directive");
    }
  }
}

static void genACC(Fortran::lower::AbstractConverter &converter,
                   Fortran::semantics::SemanticsContext &semanticsContext,
                   Fortran::lower::StatementContext &openAccCtx,
                   const Fortran::parser::OpenACCStandaloneDeclarativeConstruct
                       &declareConstruct) {

  const auto &declarativeDir =
      std::get<Fortran::parser::AccDeclarativeDirective>(declareConstruct.t);
  mlir::Location directiveLocation =
      converter.genLocation(declarativeDir.source);
  const auto &accClauseList =
      std::get<Fortran::parser::AccClauseList>(declareConstruct.t);

  if (declarativeDir.v == llvm::acc::Directive::ACCD_declare) {
    fir::FirOpBuilder &builder = converter.getFirOpBuilder();
    auto moduleOp =
        builder.getBlock()->getParent()->getParentOfType<mlir::ModuleOp>();
    auto funcOp =
        builder.getBlock()->getParent()->getParentOfType<mlir::func::FuncOp>();
    if (funcOp)
      genDeclareInFunction(converter, semanticsContext, openAccCtx,
                           directiveLocation, accClauseList);
    else if (moduleOp)
      genDeclareInModule(converter, moduleOp, accClauseList);
    return;
  }
  llvm_unreachable("unsupported declarative directive");
}

static bool hasDeviceType(llvm::SmallVector<mlir::Attribute> &arrayAttr,
                          mlir::acc::DeviceType deviceType) {
  for (auto attr : arrayAttr) {
    auto deviceTypeAttr = mlir::dyn_cast<mlir::acc::DeviceTypeAttr>(attr);
    if (deviceTypeAttr.getValue() == deviceType)
      return true;
  }
  return false;
}

template <typename RetTy, typename AttrTy>
static std::optional<RetTy>
getAttributeValueByDeviceType(llvm::SmallVector<mlir::Attribute> &attributes,
                              llvm::SmallVector<mlir::Attribute> &deviceTypes,
                              mlir::acc::DeviceType deviceType) {
  assert(attributes.size() == deviceTypes.size() &&
         "expect same number of attributes");
  for (auto it : llvm::enumerate(deviceTypes)) {
    auto deviceTypeAttr = mlir::dyn_cast<mlir::acc::DeviceTypeAttr>(it.value());
    if (deviceTypeAttr.getValue() == deviceType) {
      if constexpr (std::is_same_v<mlir::StringAttr, AttrTy>) {
        auto strAttr = mlir::dyn_cast<AttrTy>(attributes[it.index()]);
        return strAttr.getValue();
      } else if constexpr (std::is_same_v<mlir::IntegerAttr, AttrTy>) {
        auto intAttr =
            mlir::dyn_cast<mlir::IntegerAttr>(attributes[it.index()]);
        return intAttr.getInt();
      }
    }
  }
  return std::nullopt;
}

static bool compareDeviceTypeInfo(
    mlir::acc::RoutineOp op,
    llvm::SmallVector<mlir::Attribute> &bindNameArrayAttr,
    llvm::SmallVector<mlir::Attribute> &bindNameDeviceTypeArrayAttr,
    llvm::SmallVector<mlir::Attribute> &gangArrayAttr,
    llvm::SmallVector<mlir::Attribute> &gangDimArrayAttr,
    llvm::SmallVector<mlir::Attribute> &gangDimDeviceTypeArrayAttr,
    llvm::SmallVector<mlir::Attribute> &seqArrayAttr,
    llvm::SmallVector<mlir::Attribute> &workerArrayAttr,
    llvm::SmallVector<mlir::Attribute> &vectorArrayAttr) {
  for (uint32_t dtypeInt = 0;
       dtypeInt != mlir::acc::getMaxEnumValForDeviceType(); ++dtypeInt) {
    auto dtype = static_cast<mlir::acc::DeviceType>(dtypeInt);
    if (op.getBindNameValue(dtype) !=
        getAttributeValueByDeviceType<llvm::StringRef, mlir::StringAttr>(
            bindNameArrayAttr, bindNameDeviceTypeArrayAttr, dtype))
      return false;
    if (op.hasGang(dtype) != hasDeviceType(gangArrayAttr, dtype))
      return false;
    if (op.getGangDimValue(dtype) !=
        getAttributeValueByDeviceType<int64_t, mlir::IntegerAttr>(
            gangDimArrayAttr, gangDimDeviceTypeArrayAttr, dtype))
      return false;
    if (op.hasSeq(dtype) != hasDeviceType(seqArrayAttr, dtype))
      return false;
    if (op.hasWorker(dtype) != hasDeviceType(workerArrayAttr, dtype))
      return false;
    if (op.hasVector(dtype) != hasDeviceType(vectorArrayAttr, dtype))
      return false;
  }
  return true;
}

static void attachRoutineInfo(mlir::func::FuncOp func,
                              mlir::SymbolRefAttr routineAttr) {
  llvm::SmallVector<mlir::SymbolRefAttr> routines;
  if (func.getOperation()->hasAttr(mlir::acc::getRoutineInfoAttrName())) {
    auto routineInfo =
        func.getOperation()->getAttrOfType<mlir::acc::RoutineInfoAttr>(
            mlir::acc::getRoutineInfoAttrName());
    routines.append(routineInfo.getAccRoutines().begin(),
                    routineInfo.getAccRoutines().end());
  }
  routines.push_back(routineAttr);
  func.getOperation()->setAttr(
      mlir::acc::getRoutineInfoAttrName(),
      mlir::acc::RoutineInfoAttr::get(func.getContext(), routines));
}

void Fortran::lower::genOpenACCRoutineConstruct(
    Fortran::lower::AbstractConverter &converter,
    Fortran::semantics::SemanticsContext &semanticsContext, mlir::ModuleOp &mod,
    const Fortran::parser::OpenACCRoutineConstruct &routineConstruct,
    Fortran::lower::AccRoutineInfoMappingList &accRoutineInfos) {
  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
  mlir::Location loc = converter.genLocation(routineConstruct.source);
  std::optional<Fortran::parser::Name> name =
      std::get<std::optional<Fortran::parser::Name>>(routineConstruct.t);
  const auto &clauses =
      std::get<Fortran::parser::AccClauseList>(routineConstruct.t);
  mlir::func::FuncOp funcOp;
  std::string funcName;
  if (name) {
    funcName = converter.mangleName(*name->symbol);
    funcOp =
        builder.getNamedFunction(mod, builder.getMLIRSymbolTable(), funcName);
  } else {
    Fortran::semantics::Scope &scope =
        semanticsContext.FindScope(routineConstruct.source);
    const Fortran::semantics::Scope &progUnit{GetProgramUnitContaining(scope)};
    const auto *subpDetails{
        progUnit.symbol()
            ? progUnit.symbol()
                  ->detailsIf<Fortran::semantics::SubprogramDetails>()
            : nullptr};
    if (subpDetails && subpDetails->isInterface()) {
      funcName = converter.mangleName(*progUnit.symbol());
      funcOp =
          builder.getNamedFunction(mod, builder.getMLIRSymbolTable(), funcName);
    } else {
      funcOp = builder.getFunction();
      funcName = funcOp.getName();
    }
  }
  bool hasNohost = false;

  llvm::SmallVector<mlir::Attribute> seqDeviceTypes, vectorDeviceTypes,
      workerDeviceTypes, bindNameDeviceTypes, bindNames, gangDeviceTypes,
      gangDimDeviceTypes, gangDimValues;

  // device_type attribute is set to `none` until a device_type clause is
  // encountered.
  llvm::SmallVector<mlir::Attribute> crtDeviceTypes;
  crtDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get(
      builder.getContext(), mlir::acc::DeviceType::None));

  for (const Fortran::parser::AccClause &clause : clauses.v) {
    if (std::get_if<Fortran::parser::AccClause::Seq>(&clause.u)) {
      for (auto crtDeviceTypeAttr : crtDeviceTypes)
        seqDeviceTypes.push_back(crtDeviceTypeAttr);
    } else if (const auto *gangClause =
                   std::get_if<Fortran::parser::AccClause::Gang>(&clause.u)) {
      if (gangClause->v) {
        const Fortran::parser::AccGangArgList &x = *gangClause->v;
        for (const Fortran::parser::AccGangArg &gangArg : x.v) {
          if (const auto *dim =
                  std::get_if<Fortran::parser::AccGangArg::Dim>(&gangArg.u)) {
            const std::optional<int64_t> dimValue = Fortran::evaluate::ToInt64(
                *Fortran::semantics::GetExpr(dim->v));
            if (!dimValue)
              mlir::emitError(loc,
                              "dim value must be a constant positive integer");
            mlir::Attribute gangDimAttr =
                builder.getIntegerAttr(builder.getI64Type(), *dimValue);
            for (auto crtDeviceTypeAttr : crtDeviceTypes) {
              gangDimValues.push_back(gangDimAttr);
              gangDimDeviceTypes.push_back(crtDeviceTypeAttr);
            }
          }
        }
      } else {
        for (auto crtDeviceTypeAttr : crtDeviceTypes)
          gangDeviceTypes.push_back(crtDeviceTypeAttr);
      }
    } else if (std::get_if<Fortran::parser::AccClause::Vector>(&clause.u)) {
      for (auto crtDeviceTypeAttr : crtDeviceTypes)
        vectorDeviceTypes.push_back(crtDeviceTypeAttr);
    } else if (std::get_if<Fortran::parser::AccClause::Worker>(&clause.u)) {
      for (auto crtDeviceTypeAttr : crtDeviceTypes)
        workerDeviceTypes.push_back(crtDeviceTypeAttr);
    } else if (std::get_if<Fortran::parser::AccClause::Nohost>(&clause.u)) {
      hasNohost = true;
    } else if (const auto *bindClause =
                   std::get_if<Fortran::parser::AccClause::Bind>(&clause.u)) {
      if (const auto *name =
              std::get_if<Fortran::parser::Name>(&bindClause->v.u)) {
        mlir::Attribute bindNameAttr =
            builder.getStringAttr(converter.mangleName(*name->symbol));
        for (auto crtDeviceTypeAttr : crtDeviceTypes) {
          bindNames.push_back(bindNameAttr);
          bindNameDeviceTypes.push_back(crtDeviceTypeAttr);
        }
      } else if (const auto charExpr =
                     std::get_if<Fortran::parser::ScalarDefaultCharExpr>(
                         &bindClause->v.u)) {
        const std::optional<std::string> name =
            Fortran::semantics::GetConstExpr<std::string>(semanticsContext,
                                                          *charExpr);
        if (!name)
          mlir::emitError(loc, "Could not retrieve the bind name");

        mlir::Attribute bindNameAttr = builder.getStringAttr(*name);
        for (auto crtDeviceTypeAttr : crtDeviceTypes) {
          bindNames.push_back(bindNameAttr);
          bindNameDeviceTypes.push_back(crtDeviceTypeAttr);
        }
      }
    } else if (const auto *deviceTypeClause =
                   std::get_if<Fortran::parser::AccClause::DeviceType>(
                       &clause.u)) {
      crtDeviceTypes.clear();
      gatherDeviceTypeAttrs(builder, deviceTypeClause, crtDeviceTypes);
    }
  }

  mlir::OpBuilder modBuilder(mod.getBodyRegion());
  std::stringstream routineOpName;
  routineOpName << accRoutinePrefix.str() << routineCounter++;

  for (auto routineOp : mod.getOps<mlir::acc::RoutineOp>()) {
    if (routineOp.getFuncName().str().compare(funcName) == 0) {
      // If the routine is already specified with the same clauses, just skip
      // the operation creation.
      if (compareDeviceTypeInfo(routineOp, bindNames, bindNameDeviceTypes,
                                gangDeviceTypes, gangDimValues,
                                gangDimDeviceTypes, seqDeviceTypes,
                                workerDeviceTypes, vectorDeviceTypes) &&
          routineOp.getNohost() == hasNohost)
        return;
      mlir::emitError(loc, "Routine already specified with different clauses");
    }
  }

  modBuilder.create<mlir::acc::RoutineOp>(
      loc, routineOpName.str(), funcName,
      bindNames.empty() ? nullptr : builder.getArrayAttr(bindNames),
      bindNameDeviceTypes.empty() ? nullptr
                                  : builder.getArrayAttr(bindNameDeviceTypes),
      workerDeviceTypes.empty() ? nullptr
                                : builder.getArrayAttr(workerDeviceTypes),
      vectorDeviceTypes.empty() ? nullptr
                                : builder.getArrayAttr(vectorDeviceTypes),
      seqDeviceTypes.empty() ? nullptr : builder.getArrayAttr(seqDeviceTypes),
      hasNohost, /*implicit=*/false,
      gangDeviceTypes.empty() ? nullptr : builder.getArrayAttr(gangDeviceTypes),
      gangDimValues.empty() ? nullptr : builder.getArrayAttr(gangDimValues),
      gangDimDeviceTypes.empty() ? nullptr
                                 : builder.getArrayAttr(gangDimDeviceTypes));

  if (funcOp)
    attachRoutineInfo(funcOp, builder.getSymbolRefAttr(routineOpName.str()));
  else
    // FuncOp is not lowered yet. Keep the information so the routine info
    // can be attached later to the funcOp.
    accRoutineInfos.push_back(std::make_pair(
        funcName, builder.getSymbolRefAttr(routineOpName.str())));
}

void Fortran::lower::finalizeOpenACCRoutineAttachment(
    mlir::ModuleOp &mod,
    Fortran::lower::AccRoutineInfoMappingList &accRoutineInfos) {
  for (auto &mapping : accRoutineInfos) {
    mlir::func::FuncOp funcOp =
        mod.lookupSymbol<mlir::func::FuncOp>(mapping.first);
    if (!funcOp)
      mlir::emitWarning(mod.getLoc(),
                        llvm::Twine("function '") + llvm::Twine(mapping.first) +
                            llvm::Twine("' in acc routine directive is not "
                                        "found in this translation unit."));
    else
      attachRoutineInfo(funcOp, mapping.second);
  }
  accRoutineInfos.clear();
}

static void
genACC(Fortran::lower::AbstractConverter &converter,
       Fortran::lower::pft::Evaluation &eval,
       const Fortran::parser::OpenACCAtomicConstruct &atomicConstruct) {

  mlir::Location loc = converter.genLocation(atomicConstruct.source);
  Fortran::common::visit(
      Fortran::common::visitors{
          [&](const Fortran::parser::AccAtomicRead &atomicRead) {
            Fortran::lower::genOmpAccAtomicRead<Fortran::parser::AccAtomicRead,
                                                void>(converter, atomicRead,
                                                      loc);
          },
          [&](const Fortran::parser::AccAtomicWrite &atomicWrite) {
            Fortran::lower::genOmpAccAtomicWrite<
                Fortran::parser::AccAtomicWrite, void>(converter, atomicWrite,
                                                       loc);
          },
          [&](const Fortran::parser::AccAtomicUpdate &atomicUpdate) {
            Fortran::lower::genOmpAccAtomicUpdate<
                Fortran::parser::AccAtomicUpdate, void>(converter, atomicUpdate,
                                                        loc);
          },
          [&](const Fortran::parser::AccAtomicCapture &atomicCapture) {
            Fortran::lower::genOmpAccAtomicCapture<
                Fortran::parser::AccAtomicCapture, void>(converter,
                                                         atomicCapture, loc);
          },
      },
      atomicConstruct.u);
}

static void
genACC(Fortran::lower::AbstractConverter &converter,
       Fortran::semantics::SemanticsContext &semanticsContext,
       const Fortran::parser::OpenACCCacheConstruct &cacheConstruct) {
  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
  auto loopOp = builder.getRegion().getParentOfType<mlir::acc::LoopOp>();
  auto crtPos = builder.saveInsertionPoint();
  if (loopOp) {
    builder.setInsertionPoint(loopOp);
    Fortran::lower::StatementContext stmtCtx;
    llvm::SmallVector<mlir::Value> cacheOperands;
    const Fortran::parser::AccObjectListWithModifier &listWithModifier =
        std::get<Fortran::parser::AccObjectListWithModifier>(cacheConstruct.t);
    const auto &accObjectList =
        std::get<Fortran::parser::AccObjectList>(listWithModifier.t);
    const auto &modifier =
        std::get<std::optional<Fortran::parser::AccDataModifier>>(
            listWithModifier.t);

    mlir::acc::DataClause dataClause = mlir::acc::DataClause::acc_cache;
    if (modifier &&
        (*modifier).v == Fortran::parser::AccDataModifier::Modifier::ReadOnly)
      dataClause = mlir::acc::DataClause::acc_cache_readonly;
    genDataOperandOperations<mlir::acc::CacheOp>(
        accObjectList, converter, semanticsContext, stmtCtx, cacheOperands,
        dataClause,
        /*structured=*/true, /*implicit=*/false,
        /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{},
        /*setDeclareAttr*/ false);
    loopOp.getCacheOperandsMutable().append(cacheOperands);
  } else {
    llvm::report_fatal_error(
        "could not find loop to attach OpenACC cache information.");
  }
  builder.restoreInsertionPoint(crtPos);
}

mlir::Value Fortran::lower::genOpenACCConstruct(
    Fortran::lower::AbstractConverter &converter,
    Fortran::semantics::SemanticsContext &semanticsContext,
    Fortran::lower::pft::Evaluation &eval,
    const Fortran::parser::OpenACCConstruct &accConstruct) {

  mlir::Value exitCond;
  Fortran::common::visit(
      common::visitors{
          [&](const Fortran::parser::OpenACCBlockConstruct &blockConstruct) {
            genACC(converter, semanticsContext, eval, blockConstruct);
          },
          [&](const Fortran::parser::OpenACCCombinedConstruct
                  &combinedConstruct) {
            genACC(converter, semanticsContext, eval, combinedConstruct);
          },
          [&](const Fortran::parser::OpenACCLoopConstruct &loopConstruct) {
            exitCond = genACC(converter, semanticsContext, eval, loopConstruct);
          },
          [&](const Fortran::parser::OpenACCStandaloneConstruct
                  &standaloneConstruct) {
            genACC(converter, semanticsContext, standaloneConstruct);
          },
          [&](const Fortran::parser::OpenACCCacheConstruct &cacheConstruct) {
            genACC(converter, semanticsContext, cacheConstruct);
          },
          [&](const Fortran::parser::OpenACCWaitConstruct &waitConstruct) {
            genACC(converter, waitConstruct);
          },
          [&](const Fortran::parser::OpenACCAtomicConstruct &atomicConstruct) {
            genACC(converter, eval, atomicConstruct);
          },
          [&](const Fortran::parser::OpenACCEndConstruct &) {
            // No op
          },
      },
      accConstruct.u);
  return exitCond;
}

void Fortran::lower::genOpenACCDeclarativeConstruct(
    Fortran::lower::AbstractConverter &converter,
    Fortran::semantics::SemanticsContext &semanticsContext,
    Fortran::lower::StatementContext &openAccCtx,
    const Fortran::parser::OpenACCDeclarativeConstruct &accDeclConstruct,
    Fortran::lower::AccRoutineInfoMappingList &accRoutineInfos) {

  Fortran::common::visit(
      common::visitors{
          [&](const Fortran::parser::OpenACCStandaloneDeclarativeConstruct
                  &standaloneDeclarativeConstruct) {
            genACC(converter, semanticsContext, openAccCtx,
                   standaloneDeclarativeConstruct);
          },
          [&](const Fortran::parser::OpenACCRoutineConstruct
                  &routineConstruct) {
            fir::FirOpBuilder &builder = converter.getFirOpBuilder();
            mlir::ModuleOp mod = builder.getModule();
            Fortran::lower::genOpenACCRoutineConstruct(
                converter, semanticsContext, mod, routineConstruct,
                accRoutineInfos);
          },
      },
      accDeclConstruct.u);
}

void Fortran::lower::attachDeclarePostAllocAction(
    AbstractConverter &converter, fir::FirOpBuilder &builder,
    const Fortran::semantics::Symbol &sym) {
  std::stringstream fctName;
  fctName << converter.mangleName(sym) << declarePostAllocSuffix.str();
  mlir::Operation *op = &builder.getInsertionBlock()->back();

  if (auto resOp = mlir::dyn_cast<fir::ResultOp>(*op)) {
    assert(resOp.getOperands().size() == 0 &&
           "expect only fir.result op with no operand");
    op = op->getPrevNode();
  }
  assert(op && "expect operation to attach the post allocation action");

  if (op->hasAttr(mlir::acc::getDeclareActionAttrName())) {
    auto attr = op->getAttrOfType<mlir::acc::DeclareActionAttr>(
        mlir::acc::getDeclareActionAttrName());
    op->setAttr(mlir::acc::getDeclareActionAttrName(),
                mlir::acc::DeclareActionAttr::get(
                    builder.getContext(), attr.getPreAlloc(),
                    /*postAlloc=*/builder.getSymbolRefAttr(fctName.str()),
                    attr.getPreDealloc(), attr.getPostDealloc()));
  } else {
    op->setAttr(mlir::acc::getDeclareActionAttrName(),
                mlir::acc::DeclareActionAttr::get(
                    builder.getContext(),
                    /*preAlloc=*/{},
                    /*postAlloc=*/builder.getSymbolRefAttr(fctName.str()),
                    /*preDealloc=*/{}, /*postDealloc=*/{}));
  }
}

void Fortran::lower::attachDeclarePreDeallocAction(
    AbstractConverter &converter, fir::FirOpBuilder &builder,
    mlir::Value beginOpValue, const Fortran::semantics::Symbol &sym) {
  if (!sym.test(Fortran::semantics::Symbol::Flag::AccCreate) &&
      !sym.test(Fortran::semantics::Symbol::Flag::AccCopyIn) &&
      !sym.test(Fortran::semantics::Symbol::Flag::AccCopyInReadOnly) &&
      !sym.test(Fortran::semantics::Symbol::Flag::AccCopy) &&
      !sym.test(Fortran::semantics::Symbol::Flag::AccCopyOut) &&
      !sym.test(Fortran::semantics::Symbol::Flag::AccDeviceResident))
    return;

  std::stringstream fctName;
  fctName << converter.mangleName(sym) << declarePreDeallocSuffix.str();

  auto *op = beginOpValue.getDefiningOp();
  if (op->hasAttr(mlir::acc::getDeclareActionAttrName())) {
    auto attr = op->getAttrOfType<mlir::acc::DeclareActionAttr>(
        mlir::acc::getDeclareActionAttrName());
    op->setAttr(mlir::acc::getDeclareActionAttrName(),
                mlir::acc::DeclareActionAttr::get(
                    builder.getContext(), attr.getPreAlloc(),
                    attr.getPostAlloc(),
                    /*preDealloc=*/builder.getSymbolRefAttr(fctName.str()),
                    attr.getPostDealloc()));
  } else {
    op->setAttr(mlir::acc::getDeclareActionAttrName(),
                mlir::acc::DeclareActionAttr::get(
                    builder.getContext(),
                    /*preAlloc=*/{}, /*postAlloc=*/{},
                    /*preDealloc=*/builder.getSymbolRefAttr(fctName.str()),
                    /*postDealloc=*/{}));
  }
}

void Fortran::lower::attachDeclarePostDeallocAction(
    AbstractConverter &converter, fir::FirOpBuilder &builder,
    const Fortran::semantics::Symbol &sym) {
  if (!sym.test(Fortran::semantics::Symbol::Flag::AccCreate) &&
      !sym.test(Fortran::semantics::Symbol::Flag::AccCopyIn) &&
      !sym.test(Fortran::semantics::Symbol::Flag::AccCopyInReadOnly) &&
      !sym.test(Fortran::semantics::Symbol::Flag::AccCopy) &&
      !sym.test(Fortran::semantics::Symbol::Flag::AccCopyOut) &&
      !sym.test(Fortran::semantics::Symbol::Flag::AccDeviceResident))
    return;

  std::stringstream fctName;
  fctName << converter.mangleName(sym) << declarePostDeallocSuffix.str();
  mlir::Operation *op = &builder.getInsertionBlock()->back();
  if (auto resOp = mlir::dyn_cast<fir::ResultOp>(*op)) {
    assert(resOp.getOperands().size() == 0 &&
           "expect only fir.result op with no operand");
    op = op->getPrevNode();
  }
  assert(op && "expect operation to attach the post deallocation action");
  if (op->hasAttr(mlir::acc::getDeclareActionAttrName())) {
    auto attr = op->getAttrOfType<mlir::acc::DeclareActionAttr>(
        mlir::acc::getDeclareActionAttrName());
    op->setAttr(mlir::acc::getDeclareActionAttrName(),
                mlir::acc::DeclareActionAttr::get(
                    builder.getContext(), attr.getPreAlloc(),
                    attr.getPostAlloc(), attr.getPreDealloc(),
                    /*postDealloc=*/builder.getSymbolRefAttr(fctName.str())));
  } else {
    op->setAttr(mlir::acc::getDeclareActionAttrName(),
                mlir::acc::DeclareActionAttr::get(
                    builder.getContext(),
                    /*preAlloc=*/{}, /*postAlloc=*/{}, /*preDealloc=*/{},
                    /*postDealloc=*/builder.getSymbolRefAttr(fctName.str())));
  }
}

void Fortran::lower::genOpenACCTerminator(fir::FirOpBuilder &builder,
                                          mlir::Operation *op,
                                          mlir::Location loc) {
  if (mlir::isa<mlir::acc::ParallelOp, mlir::acc::LoopOp>(op))
    builder.create<mlir::acc::YieldOp>(loc);
  else
    builder.create<mlir::acc::TerminatorOp>(loc);
}

bool Fortran::lower::isInOpenACCLoop(fir::FirOpBuilder &builder) {
  if (builder.getBlock()->getParent()->getParentOfType<mlir::acc::LoopOp>())
    return true;
  return false;
}

void Fortran::lower::setInsertionPointAfterOpenACCLoopIfInside(
    fir::FirOpBuilder &builder) {
  if (auto loopOp =
          builder.getBlock()->getParent()->getParentOfType<mlir::acc::LoopOp>())
    builder.setInsertionPointAfter(loopOp);
}

void Fortran::lower::genEarlyReturnInOpenACCLoop(fir::FirOpBuilder &builder,
                                                 mlir::Location loc) {
  mlir::Value yieldValue =
      builder.createIntegerConstant(loc, builder.getI1Type(), 1);
  builder.create<mlir::acc::YieldOp>(loc, yieldValue);
}

int64_t Fortran::lower::getCollapseValue(
    const Fortran::parser::AccClauseList &clauseList) {
  for (const Fortran::parser::AccClause &clause : clauseList.v) {
    if (const auto *collapseClause =
            std::get_if<Fortran::parser::AccClause::Collapse>(&clause.u)) {
      const parser::AccCollapseArg &arg = collapseClause->v;
      const auto &collapseValue{std::get<parser::ScalarIntConstantExpr>(arg.t)};
      return *Fortran::semantics::GetIntValue(collapseValue);
    }
  }
  return 1;
}