// RUN: %libomp-cxx-compile
// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=0 %libomp-run
// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=0 %libomp-run
// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=1 %libomp-run
// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run
// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=3 %libomp-run
// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=4 %libomp-run
// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=5 %libomp-run
//
// RUN: %libomp-cxx-compile -DUSE_HIDDEN_HELPERS=1
// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=0 %libomp-run
// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=0 %libomp-run
// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=1 KMP_HOT_TEAMS_MODE=1 %libomp-run
// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run
// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=3 %libomp-run
// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=4 %libomp-run
// RUN: env KMP_HOT_TEAMS_MAX_LEVEL=5 %libomp-run
// This test stresses the task team mechanism by running a simple
// increment task over and over with varying number of threads and nesting.
// The test covers nested serial teams and mixing serial teams with
// normal active teams.
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
// The number of times to run each test
#define NTIMES 5
// Regular single increment task
void task_inc_a(int *a) {
#pragma omp task
{
#pragma omp atomic
(*a)++;
}
}
// Splitting increment task that binary splits the incrementing task
void task_inc_split_a(int *a, int low, int high) {
#pragma omp task firstprivate(low, high)
{
if (low == high) {
#pragma omp atomic
(*a)++;
} else if (low < high) {
int mid = (high - low) / 2 + low;
task_inc_split_a(a, low, mid);
task_inc_split_a(a, mid + 1, high);
}
}
}
#ifdef USE_HIDDEN_HELPERS
// Hidden helper tasks force serial regions to create task teams
void task_inc_a_hidden_helper(int *a) {
#pragma omp target map(tofrom : a[0]) nowait
{
#pragma omp atomic
(*a)++;
}
}
#else
// Detached tasks force serial regions to create task teams
void task_inc_a_detached(int *a, omp_event_handle_t handle) {
#pragma omp task detach(handle)
{
#pragma omp atomic
(*a)++;
omp_fulfill_event(handle);
}
}
#endif
void check_a(int *a, int expected) {
if (*a != expected) {
fprintf(stderr,
"FAIL: a = %d instead of expected = %d. Compile with "
"-DVERBOSE for more verbose output.\n",
*a, expected);
exit(EXIT_FAILURE);
}
}
// Every thread creates a single "increment" task
void test_tasks(omp_event_handle_t *handles, int expected, int *a) {
int tid = omp_get_thread_num();
task_inc_a(a);
#pragma omp barrier
check_a(a, expected);
#pragma omp barrier
check_a(a, expected);
#pragma omp barrier
#ifdef USE_HIDDEN_HELPERS
task_inc_a_hidden_helper(a);
#else
task_inc_a_detached(a, handles[tid]);
#endif
#pragma omp barrier
check_a(a, 2 * expected);
#pragma omp barrier
task_inc_a(a);
#pragma omp barrier
check_a(a, 3 * expected);
}
// Testing single level of parallelism with increment tasks
void test_base(int nthreads) {
#ifdef VERBOSE
#pragma omp master
printf(" test_base(%d)\n", nthreads);
#endif
int a = 0;
omp_event_handle_t *handles;
handles = (omp_event_handle_t *)malloc(sizeof(omp_event_handle_t) * nthreads);
#pragma omp parallel num_threads(nthreads) shared(a)
{ test_tasks(handles, nthreads, &a); }
free(handles);
}
// Testing nested parallel with increment tasks
// first = nthreads of outer parallel
// second = nthreads of nested parallel
void test_nest(int first, int second) {
#ifdef VERBOSE
#pragma omp master
printf(" test_nest(%d, %d)\n", first, second);
#endif
#pragma omp parallel num_threads(first)
{ test_base(second); }
}
// Testing 2-level nested parallels with increment tasks
// first = nthreads of outer parallel
// second = nthreads of nested parallel
// third = nthreads of second nested parallel
void test_nest2(int first, int second, int third) {
#ifdef VERBOSE
#pragma omp master
printf(" test_nest2(%d, %d, %d)\n", first, second, third);
#endif
#pragma omp parallel num_threads(first)
{ test_nest(second, third); }
}
// Testing 3-level nested parallels with increment tasks
// first = nthreads of outer parallel
// second = nthreads of nested parallel
// third = nthreads of second nested parallel
// fourth = nthreads of third nested parallel
void test_nest3(int first, int second, int third, int fourth) {
#ifdef VERBOSE
#pragma omp master
printf(" test_nest3(%d, %d, %d, %d)\n", first, second, third, fourth);
#endif
#pragma omp parallel num_threads(first)
{ test_nest2(second, third, fourth); }
}
// Testing 4-level nested parallels with increment tasks
// first = nthreads of outer parallel
// second = nthreads of nested parallel
// third = nthreads of second nested parallel
// fourth = nthreads of third nested parallel
// fifth = nthreads of fourth nested parallel
void test_nest4(int first, int second, int third, int fourth, int fifth) {
#ifdef VERBOSE
#pragma omp master
printf("test_nest4(%d, %d, %d, %d, %d)\n", first, second, third, fourth,
fifth);
#endif
#pragma omp parallel num_threads(first)
{ test_nest3(second, third, fourth, fifth); }
}
// Single thread starts a binary splitting "increment" task
// Detached tasks are still single "increment" task
void test_tasks_split(omp_event_handle_t *handles, int expected, int *a) {
int tid = omp_get_thread_num();
#pragma omp single
task_inc_split_a(a, 1, expected); // task team A
#pragma omp barrier
check_a(a, expected);
#pragma omp barrier
check_a(a, expected);
#pragma omp barrier
#ifdef USE_HIDDEN_HELPERS
task_inc_a_hidden_helper(a);
#else
task_inc_a_detached(a, handles[tid]);
#endif
#pragma omp barrier
check_a(a, 2 * expected);
#pragma omp barrier
#pragma omp single
task_inc_split_a(a, 1, expected); // task team B
#pragma omp barrier
check_a(a, 3 * expected);
}
// Testing single level of parallelism with splitting incrementing tasks
void test_base_split(int nthreads) {
#ifdef VERBOSE
#pragma omp master
printf(" test_base_split(%d)\n", nthreads);
#endif
int a = 0;
omp_event_handle_t *handles;
handles = (omp_event_handle_t *)malloc(sizeof(omp_event_handle_t) * nthreads);
#pragma omp parallel num_threads(nthreads) shared(a)
{ test_tasks_split(handles, nthreads, &a); }
free(handles);
}
// Testing nested parallels with splitting tasks
// first = nthreads of outer parallel
// second = nthreads of nested parallel
void test_nest_split(int first, int second) {
#ifdef VERBOSE
#pragma omp master
printf(" test_nest_split(%d, %d)\n", first, second);
#endif
#pragma omp parallel num_threads(first)
{ test_base_split(second); }
}
// Testing doubly nested parallels with splitting tasks
// first = nthreads of outer parallel
// second = nthreads of nested parallel
// third = nthreads of second nested parallel
void test_nest2_split(int first, int second, int third) {
#ifdef VERBOSE
#pragma omp master
printf("test_nest2_split(%d, %d, %d)\n", first, second, third);
#endif
#pragma omp parallel num_threads(first)
{ test_nest_split(second, third); }
}
template <typename... Args>
void run_ntimes(int n, void (*func)(Args...), Args... args) {
for (int i = 0; i < n; ++i) {
func(args...);
}
}
int main() {
omp_set_max_active_levels(5);
run_ntimes(NTIMES, test_base, 4);
run_ntimes(NTIMES, test_base, 1);
run_ntimes(NTIMES, test_base, 8);
run_ntimes(NTIMES, test_base, 2);
run_ntimes(NTIMES, test_base, 6);
run_ntimes(NTIMES, test_nest, 1, 1);
run_ntimes(NTIMES, test_nest, 1, 5);
run_ntimes(NTIMES, test_nest, 2, 6);
run_ntimes(NTIMES, test_nest, 1, 1);
run_ntimes(NTIMES, test_nest, 4, 3);
run_ntimes(NTIMES, test_nest, 3, 2);
run_ntimes(NTIMES, test_nest, 1, 1);
run_ntimes(NTIMES, test_nest2, 1, 1, 2);
run_ntimes(NTIMES, test_nest2, 1, 2, 1);
run_ntimes(NTIMES, test_nest2, 2, 2, 1);
run_ntimes(NTIMES, test_nest2, 2, 1, 1);
run_ntimes(NTIMES, test_nest2, 4, 2, 1);
run_ntimes(NTIMES, test_nest2, 4, 2, 2);
run_ntimes(NTIMES, test_nest2, 1, 1, 1);
run_ntimes(NTIMES, test_nest2, 4, 2, 2);
run_ntimes(NTIMES, test_nest3, 1, 1, 1, 1);
run_ntimes(NTIMES, test_nest3, 1, 2, 1, 1);
run_ntimes(NTIMES, test_nest3, 1, 1, 2, 1);
run_ntimes(NTIMES, test_nest3, 1, 1, 1, 2);
run_ntimes(NTIMES, test_nest3, 2, 1, 1, 1);
run_ntimes(NTIMES, test_nest4, 1, 1, 1, 1, 1);
run_ntimes(NTIMES, test_nest4, 2, 1, 1, 1, 1);
run_ntimes(NTIMES, test_nest4, 1, 2, 1, 1, 1);
run_ntimes(NTIMES, test_nest4, 1, 1, 2, 1, 1);
run_ntimes(NTIMES, test_nest4, 1, 1, 1, 2, 1);
run_ntimes(NTIMES, test_nest4, 1, 1, 1, 1, 2);
run_ntimes(NTIMES, test_nest4, 1, 1, 1, 1, 1);
run_ntimes(NTIMES, test_nest4, 1, 2, 1, 2, 1);
run_ntimes(NTIMES, test_base_split, 4);
run_ntimes(NTIMES, test_base_split, 2);
run_ntimes(NTIMES, test_base_split, 7);
run_ntimes(NTIMES, test_base_split, 1);
run_ntimes(NTIMES, test_nest_split, 4, 2);
run_ntimes(NTIMES, test_nest_split, 2, 1);
run_ntimes(NTIMES, test_nest_split, 7, 2);
run_ntimes(NTIMES, test_nest_split, 1, 1);
run_ntimes(NTIMES, test_nest_split, 1, 4);
run_ntimes(NTIMES, test_nest2_split, 1, 1, 2);
run_ntimes(NTIMES, test_nest2_split, 1, 2, 1);
run_ntimes(NTIMES, test_nest2_split, 2, 2, 1);
run_ntimes(NTIMES, test_nest2_split, 2, 1, 1);
run_ntimes(NTIMES, test_nest2_split, 4, 2, 1);
run_ntimes(NTIMES, test_nest2_split, 4, 2, 2);
run_ntimes(NTIMES, test_nest2_split, 1, 1, 1);
run_ntimes(NTIMES, test_nest2_split, 4, 2, 2);
printf("PASS\n");
return EXIT_SUCCESS;
}