llvm/openmp/runtime/src/kmp_collapse.cpp

/*
 * kmp_collapse.cpp -- loop collapse feature
 */

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "kmp.h"
#include "kmp_error.h"
#include "kmp_i18n.h"
#include "kmp_itt.h"
#include "kmp_stats.h"
#include "kmp_str.h"
#include "kmp_collapse.h"

#if OMPT_SUPPORT
#include "ompt-specific.h"
#endif

// OMPTODO: different style of comments (see kmp_sched)
// OMPTODO: OMPT/OMPD

// avoid inadevertently using a library based abs
template <typename T> T __kmp_abs(const T val) {}
kmp_uint32 __kmp_abs(const kmp_uint32 val) {}
kmp_uint64 __kmp_abs(const kmp_uint64 val) {}

//----------------------------------------------------------------------------
// Common functions for working with rectangular and non-rectangular loops
//----------------------------------------------------------------------------

template <typename T> int __kmp_sign(T val) {}

template <typename T> class CollapseAllocator {};

//----------Loop canonicalization---------------------------------------------

// For loop nest (any shape):
// convert != to < or >;
// switch from using < or > to <= or >=.
// "bounds" array has to be allocated per thread.
// All other internal functions will work only with canonicalized loops.
template <typename T>
void kmp_canonicalize_one_loop_XX(
    ident_t *loc,
    /*in/out*/ bounds_infoXX_template<T> *bounds) {}

// Canonicalize loop nest. original_bounds_nest is an array of length n.
void kmp_canonicalize_loop_nest(ident_t *loc,
                                /*in/out*/ bounds_info_t *original_bounds_nest,
                                kmp_index_t n) {}

//----------Calculating trip count on one level-------------------------------

// Calculate trip count on this loop level.
// We do this either for a rectangular loop nest,
// or after an adjustment bringing the loops to a parallelepiped shape.
// This number should not depend on the value of outer IV
// even if the formular has lb1 and ub1.
// Note: for non-rectangular loops don't use span for this, it's too big.

template <typename T>
kmp_loop_nest_iv_t kmp_calculate_trip_count_XX(
    /*in/out*/ bounds_infoXX_template<T> *bounds) {}

// Calculate trip count on this loop level.
kmp_loop_nest_iv_t kmp_calculate_trip_count(/*in/out*/ bounds_info_t *bounds) {}

//----------Trim original iv according to its type----------------------------

// Trim original iv according to its type.
// Return kmp_uint64 value which can be easily used in all internal calculations
// And can be statically cast back to original type in user code.
kmp_uint64 kmp_fix_iv(loop_type_t loop_iv_type, kmp_uint64 original_iv) {}

//----------Compare two IVs (remember they have a type)-----------------------

bool kmp_ivs_eq(loop_type_t loop_iv_type, kmp_uint64 original_iv1,
                kmp_uint64 original_iv2) {}

//----------Calculate original iv on one level--------------------------------

// Return true if the point fits into upper bounds on this level,
// false otherwise
template <typename T>
bool kmp_iv_is_in_upper_bound_XX(const bounds_infoXX_template<T> *bounds,
                                 const kmp_point_t original_ivs,
                                 kmp_index_t ind) {}

// Calculate one iv corresponding to iteration on the level ind.
// Return true if it fits into lower-upper bounds on this level
// (if not, we need to re-calculate)
template <typename T>
bool kmp_calc_one_iv_XX(const bounds_infoXX_template<T> *bounds,
                        /*in/out*/ kmp_point_t original_ivs,
                        const kmp_iterations_t iterations, kmp_index_t ind,
                        bool start_with_lower_bound, bool checkBounds) {}

bool kmp_calc_one_iv(const bounds_info_t *bounds,
                     /*in/out*/ kmp_point_t original_ivs,
                     const kmp_iterations_t iterations, kmp_index_t ind,
                     bool start_with_lower_bound, bool checkBounds) {}

//----------Calculate original iv on one level for rectangular loop nest------

// Calculate one iv corresponding to iteration on the level ind.
// Return true if it fits into lower-upper bounds on this level
// (if not, we need to re-calculate)
template <typename T>
void kmp_calc_one_iv_rectang_XX(const bounds_infoXX_template<T> *bounds,
                                /*in/out*/ kmp_uint64 *original_ivs,
                                const kmp_iterations_t iterations,
                                kmp_index_t ind) {}

void kmp_calc_one_iv_rectang(const bounds_info_t *bounds,
                             /*in/out*/ kmp_uint64 *original_ivs,
                             const kmp_iterations_t iterations,
                             kmp_index_t ind) {}

//----------------------------------------------------------------------------
// Rectangular loop nest
//----------------------------------------------------------------------------

//----------Canonicalize loop nest and calculate trip count-------------------

// Canonicalize loop nest and calculate overall trip count.
// "bounds_nest" has to be allocated per thread.
// API will modify original bounds_nest array to bring it to a canonical form
// (only <= and >=, no !=, <, >). If the original loop nest was already in a
// canonical form there will be no changes to bounds in bounds_nest array
// (only trip counts will be calculated).
// Returns trip count of overall space.
extern "C" kmp_loop_nest_iv_t
__kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid,
                                 /*in/out*/ bounds_info_t *original_bounds_nest,
                                 kmp_index_t n) {}

//----------Calculate old induction variables---------------------------------

// Calculate old induction variables corresponding to overall new_iv.
// Note: original IV will be returned as if it had kmp_uint64 type,
// will have to be converted to original type in user code.
// Note: trip counts should be already calculated by
// __kmpc_process_loop_nest_rectang.
// OMPTODO: special case 2, 3 nested loops: either do different
// interface without array or possibly template this over n
extern "C" void
__kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv,
                                 const bounds_info_t *original_bounds_nest,
                                 /*out*/ kmp_uint64 *original_ivs,
                                 kmp_index_t n) {}

//----------------------------------------------------------------------------
// Non-rectangular loop nest
//----------------------------------------------------------------------------

//----------Calculate maximum possible span of iv values on one level---------

// Calculate span for IV on this loop level for "<=" case.
// Note: it's for <= on this loop nest level, so lower bound should be smallest
// value, upper bound should be the biggest value. If the loop won't execute,
// 'smallest' may be bigger than 'biggest', but we'd better not switch them
// around.
template <typename T>
void kmp_calc_span_lessoreq_XX(
    /* in/out*/ bounds_info_internalXX_template<T> *bounds,
    /* in/out*/ bounds_info_internal_t *bounds_nest) {}

// Calculate span for IV on this loop level for ">=" case.
template <typename T>
void kmp_calc_span_greateroreq_XX(
    /* in/out*/ bounds_info_internalXX_template<T> *bounds,
    /* in/out*/ bounds_info_internal_t *bounds_nest) {}

// Calculate maximum possible span for IV on this loop level.
template <typename T>
void kmp_calc_span_XX(
    /* in/out*/ bounds_info_internalXX_template<T> *bounds,
    /* in/out*/ bounds_info_internal_t *bounds_nest) {}

//----------All initial processing of the loop nest---------------------------

// Calculate new bounds for this loop level.
// To be able to work with the nest we need to get it to a parallelepiped shape.
// We need to stay in the original range of values, so that there will be no
// overflow, for that we'll adjust both upper and lower bounds as needed.
template <typename T>
void kmp_calc_new_bounds_XX(
    /* in/out*/ bounds_info_internalXX_template<T> *bounds,
    /* in/out*/ bounds_info_internal_t *bounds_nest) {}

// Do all processing for one canonicalized loop in the nest
// (assuming that outer loops already were processed):
template <typename T>
kmp_loop_nest_iv_t kmp_process_one_loop_XX(
    /* in/out*/ bounds_info_internalXX_template<T> *bounds,
    /*in/out*/ bounds_info_internal_t *bounds_nest) {}

// Non-rectangular loop nest, canonicalized to use <= or >=.
// Process loop nest to have a parallelepiped shape,
// calculate biggest spans for IV's on all levels and calculate overall trip
// count. "bounds_nest" has to be allocated per thread.
// Returns overall trip count (for adjusted space).
kmp_loop_nest_iv_t kmp_process_loop_nest(
    /*in/out*/ bounds_info_internal_t *bounds_nest, kmp_index_t n) {}

//----------Calculate iterations (in the original or updated space)-----------

// Calculate number of iterations in original or updated space resulting in
// original_ivs[ind] (only on this level, non-negative)
// (not counting initial iteration)
template <typename T>
kmp_loop_nest_iv_t
kmp_calc_number_of_iterations_XX(const bounds_infoXX_template<T> *bounds,
                                 const kmp_point_t original_ivs,
                                 kmp_index_t ind) {}

// Calculate number of iterations in the original or updated space resulting in
// original_ivs[ind] (only on this level, non-negative)
kmp_loop_nest_iv_t kmp_calc_number_of_iterations(const bounds_info_t *bounds,
                                                 const kmp_point_t original_ivs,
                                                 kmp_index_t ind) {}

//----------Calculate new iv corresponding to original ivs--------------------

// We got a point in the original loop nest.
// Take updated bounds and calculate what new_iv will correspond to this point.
// When we are getting original IVs from new_iv, we have to adjust to fit into
// original loops bounds. Getting new_iv for the adjusted original IVs will help
// with making more chunks non-empty.
kmp_loop_nest_iv_t
kmp_calc_new_iv_from_original_ivs(const bounds_info_internal_t *bounds_nest,
                                  const kmp_point_t original_ivs,
                                  kmp_index_t n) {}

//----------Calculate original ivs for provided iterations--------------------

// Calculate original IVs for provided iterations, assuming iterations are
// calculated in the original space.
// Loop nest is in canonical form (with <= / >=).
bool kmp_calc_original_ivs_from_iterations(
    const bounds_info_t *original_bounds_nest, kmp_index_t n,
    /*in/out*/ kmp_point_t original_ivs,
    /*in/out*/ kmp_iterations_t iterations, kmp_index_t ind) {}

//----------Calculate original ivs for the beginning of the loop nest---------

// Calculate IVs for the beginning of the loop nest.
// Note: lower bounds of all loops may not work -
// if on some of the iterations of the outer loops inner loops are empty.
// Loop nest is in canonical form (with <= / >=).
bool kmp_calc_original_ivs_for_start(const bounds_info_t *original_bounds_nest,
                                     kmp_index_t n,
                                     /*out*/ kmp_point_t original_ivs) {}

//----------Calculate next point in the original loop space-------------------

// From current set of original IVs calculate next point.
// Return false if there is no next point in the loop bounds.
bool kmp_calc_next_original_ivs(const bounds_info_t *original_bounds_nest,
                                kmp_index_t n, const kmp_point_t original_ivs,
                                /*out*/ kmp_point_t next_original_ivs) {}

//----------Calculate chunk end in the original loop space--------------------

// For one level calculate old induction variable corresponding to overall
// new_iv for the chunk end.
// Return true if it fits into upper bound on this level
// (if not, we need to re-calculate)
template <typename T>
bool kmp_calc_one_iv_for_chunk_end_XX(
    const bounds_infoXX_template<T> *bounds,
    const bounds_infoXX_template<T> *updated_bounds,
    /*in/out*/ kmp_point_t original_ivs, const kmp_iterations_t iterations,
    kmp_index_t ind, bool start_with_lower_bound, bool compare_with_start,
    const kmp_point_t original_ivs_start) {}

// For one level calculate old induction variable corresponding to overall
// new_iv for the chunk end.
bool kmp_calc_one_iv_for_chunk_end(const bounds_info_t *bounds,
                                   const bounds_info_t *updated_bounds,
                                   /*in/out*/ kmp_point_t original_ivs,
                                   const kmp_iterations_t iterations,
                                   kmp_index_t ind, bool start_with_lower_bound,
                                   bool compare_with_start,
                                   const kmp_point_t original_ivs_start) {}

// Calculate old induction variables corresponding to overall new_iv for the
// chunk end. If due to space extension we are getting old IVs outside of the
// boundaries, bring them into the boundaries. Need to do this in the runtime,
// esp. on the lower bounds side. When getting result need to make sure that the
// new chunk starts at next position to old chunk, not overlaps with it (this is
// done elsewhere), and need to make sure end of the chunk is further than the
// beginning of the chunk. We don't need an exact ending point here, just
// something more-or-less close to the desired chunk length, bigger is fine
// (smaller would be fine, but we risk going into infinite loop, so do smaller
// only at the very end of the space). result: false if could not find the
// ending point in the original loop space. In this case the caller can use
// original upper bounds as the end of the chunk. Chunk won't be empty, because
// it'll have at least the starting point, which is by construction in the
// original space.
bool kmp_calc_original_ivs_for_chunk_end(
    const bounds_info_t *original_bounds_nest, kmp_index_t n,
    const bounds_info_internal_t *updated_bounds_nest,
    const kmp_point_t original_ivs_start, kmp_loop_nest_iv_t new_iv,
    /*out*/ kmp_point_t original_ivs) {}

//----------Calculate upper bounds for the last chunk-------------------------

// Calculate one upper bound for the end.
template <typename T>
void kmp_calc_one_iv_end_XX(const bounds_infoXX_template<T> *bounds,
                            /*in/out*/ kmp_point_t original_ivs,
                            kmp_index_t ind) {}

void kmp_calc_one_iv_end(const bounds_info_t *bounds,
                         /*in/out*/ kmp_point_t original_ivs, kmp_index_t ind) {}

// Calculate upper bounds for the last loop iteration. Just use original upper
// bounds (adjusted when canonicalized to use <= / >=). No need to check that
// this point is in the original space (it's likely not)
void kmp_calc_original_ivs_for_end(
    const bounds_info_t *const original_bounds_nest, kmp_index_t n,
    /*out*/ kmp_point_t original_ivs) {}

/**************************************************************************
 * Identify nested loop structure - loops come in the canonical form
 * Lower triangle matrix: i = 0; i <= N; i++        {0,0}:{N,0}
 *                        j = 0; j <= 0/-1+1*i; j++ {0,0}:{0/-1,1}
 * Upper Triangle matrix
 *                        i = 0;     i <= N; i++    {0,0}:{N,0}
 *                        j = 0+1*i; j <= N; j++    {0,1}:{N,0}
 * ************************************************************************/
nested_loop_type_t
kmp_identify_nested_loop_structure(/*in*/ bounds_info_t *original_bounds_nest,
                                   /*in*/ kmp_index_t n) {}

/**************************************************************************
 * SQRT Approximation: https://math.mit.edu/~stevenj/18.335/newton-sqrt.pdf
 * Start point is x so the result is always > sqrt(x)
 * The method has uniform convergence, PRECISION is set to 0.1
 * ************************************************************************/
#define level_of_precision
double sqrt_newton_approx(/*in*/ kmp_uint64 x) {}

/**************************************************************************
 *  Handle lower triangle matrix in the canonical form
 *  i = 0; i <= N; i++          {0,0}:{N,0}
 *  j = 0; j <= 0/-1 + 1*i; j++ {0,0}:{0/-1,1}
 * ************************************************************************/
void kmp_handle_lower_triangle_matrix(
    /*in*/ kmp_uint32 nth,
    /*in*/ kmp_uint32 tid,
    /*in */ kmp_index_t n,
    /*in/out*/ bounds_info_t *original_bounds_nest,
    /*out*/ bounds_info_t *chunk_bounds_nest) {}

/**************************************************************************
 *  Handle upper triangle matrix in the canonical form
 *  i = 0; i <= N; i++     {0,0}:{N,0}
 *  j = 0+1*i; j <= N; j++ {0,1}:{N,0}
 * ************************************************************************/
void kmp_handle_upper_triangle_matrix(
    /*in*/ kmp_uint32 nth,
    /*in*/ kmp_uint32 tid,
    /*in */ kmp_index_t n,
    /*in/out*/ bounds_info_t *original_bounds_nest,
    /*out*/ bounds_info_t *chunk_bounds_nest) {}
//----------Init API for non-rectangular loops--------------------------------

// Init API for collapsed loops (static, no chunks defined).
// "bounds_nest" has to be allocated per thread.
// API will modify original bounds_nest array to bring it to a canonical form
// (only <= and >=, no !=, <, >). If the original loop nest was already in a
// canonical form there will be no changes to bounds in bounds_nest array
// (only trip counts will be calculated). Internally API will expand the space
// to parallelogram/parallelepiped, calculate total, calculate bounds for the
// chunks in terms of the new IV, re-calc them in terms of old IVs (especially
// important on the left side, to hit the lower bounds and not step over), and
// pick the correct chunk for this thread (so it will calculate chunks up to the
// needed one). It could be optimized to calculate just this chunk, potentially
// a bit less well distributed among threads. It is designed to make sure that
// threads will receive predictable chunks, deterministically (so that next nest
// of loops with similar characteristics will get exactly same chunks on same
// threads).
// Current contract: chunk_bounds_nest has only lb0 and ub0,
// lb1 and ub1 are set to 0 and can be ignored. (This may change in the future).
extern "C" kmp_int32
__kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid,
                          /*in/out*/ bounds_info_t *original_bounds_nest,
                          /*out*/ bounds_info_t *chunk_bounds_nest,
                          kmp_index_t n, /*out*/ kmp_int32 *plastiter) {}