llvm/openmp/runtime/src/kmp_collapse.h

/*
 * kmp_collapse.h -- header for loop collapse feature
 */

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef KMP_COLLAPSE_H
#define KMP_COLLAPSE_H

#include <type_traits>

// Type of the index into the loop nest structures
// (with values from 0 to less than n from collapse(n))
kmp_index_t;

// Type for combined loop nest space IV:
kmp_loop_nest_iv_t;

// Loop has <, <=, etc. as a comparison:
enum comparison_t : kmp_int32 {};

// Type of loop IV.
// Type of bounds and step, after usual promotions
// are a subset of these types (32 & 64 only):
enum loop_type_t : kmp_int32 {};

// Defining loop types to handle special cases
enum nested_loop_type_t : kmp_int32 {};

/*!
 @ingroup WORK_SHARING
 * Describes the structure for rectangular nested loops.
 */
template <typename T> struct bounds_infoXX_template {};

/*!
 @ingroup WORK_SHARING
 * Interface struct for rectangular nested loops.
 * Same size as bounds_infoXX_template.
 */
struct bounds_info_t {};

//-------------------------------------------------------------------------
// Additional types for internal representation:

// Array for a point in the loop space, in the original space.
// It's represented in kmp_uint64, but each dimention is calculated in
// that loop IV type. Also dimentions have to be converted to those types
// when used in generated code.
kmp_point_t;

// Array: Number of loop iterations on each nesting level to achieve some point,
// in expanded space or in original space.
// OMPTODO: move from using iterations to using offsets (iterations multiplied
// by steps). For those we need to be careful with the types, as step can be
// negative, but it'll remove multiplications and divisions in several places.
kmp_iterations_t;

// Internal struct with additional info:
template <typename T> struct bounds_info_internalXX_template {};

// Internal struct with additional info:
struct bounds_info_internal_t {};

//----------APIs for rectangular loop nests--------------------------------

// Canonicalize loop nest and calculate overall trip count.
// "bounds_nest" has to be allocated per thread.
// API will modify original bounds_nest array to bring it to a canonical form
// (only <= and >=, no !=, <, >). If the original loop nest was already in a
// canonical form there will be no changes to bounds in bounds_nest array
// (only trip counts will be calculated).
// Returns trip count of overall space.
extern "C" kmp_loop_nest_iv_t
__kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid,
                                 /*in/out*/ bounds_info_t *original_bounds_nest,
                                 kmp_index_t n);

// Calculate old induction variables corresponding to overall new_iv.
// Note: original IV will be returned as if it had kmp_uint64 type,
// will have to be converted to original type in user code.
// Note: trip counts should be already calculated by
// __kmpc_process_loop_nest_rectang.
// OMPTODO: special case 2, 3 nested loops - if it'll be possible to inline
// that into user code.
extern "C" void
__kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv,
                                 const bounds_info_t *original_bounds_nest,
                                 /*out*/ kmp_uint64 *original_ivs,
                                 kmp_index_t n);

//----------Init API for non-rectangular loops--------------------------------

// Init API for collapsed loops (static, no chunks defined).
// "bounds_nest" has to be allocated per thread.
// API will modify original bounds_nest array to bring it to a canonical form
// (only <= and >=, no !=, <, >). If the original loop nest was already in a
// canonical form there will be no changes to bounds in bounds_nest array
// (only trip counts will be calculated). Internally API will expand the space
// to parallelogram/parallelepiped, calculate total, calculate bounds for the
// chunks in terms of the new IV, re-calc them in terms of old IVs (especially
// important on the left side, to hit the lower bounds and not step over), and
// pick the correct chunk for this thread (so it will calculate chunks up to the
// needed one). It could be optimized to calculate just this chunk, potentially
// a bit less well distributed among threads. It is designed to make sure that
// threads will receive predictable chunks, deterministically (so that next nest
// of loops with similar characteristics will get exactly same chunks on same
// threads).
// Current contract: chunk_bounds_nest has only lb0 and ub0,
// lb1 and ub1 are set to 0 and can be ignored. (This may change in the future).
extern "C" kmp_int32
__kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid,
                          /*in/out*/ bounds_info_t *original_bounds_nest,
                          /*out*/ bounds_info_t *chunk_bounds_nest,
                          kmp_index_t n,
                          /*out*/ kmp_int32 *plastiter);

#endif // KMP_COLLAPSE_H