llvm/openmp/runtime/test/parallel/omp_parallel_num_threads_list.c

// RUN: %libomp-compile && env OMP_NUM_THREADS=2,2,2,2,2 %libomp-run
#include <stdio.h>
#include "omp_testsuite.h"

// When compiler supports num_threads clause list format, remove the following
// and use num_threads clause directly
#if defined(__cplusplus)
extern "C" {
#endif

int __kmpc_global_thread_num(void *loc);
void __kmpc_push_num_threads_list(void *loc, int gtid, unsigned length,
                                  int *list);

#if defined(__cplusplus)
}
#endif

int test_omp_parallel_num_threads_list() {
  int num_failed = 0;

// Initially, 5 levels specified via OMP_NUM_THREADS with 2 threads per level
// Check top 3 levels
#pragma omp parallel reduction(+ : num_failed) // 1st level
  {
#pragma omp single
    num_failed = num_failed + !(omp_get_num_threads() == 2);
#pragma omp parallel reduction(+ : num_failed) // 2nd level
    {
#pragma omp single
      num_failed = num_failed + !(omp_get_num_threads() == 2);
#pragma omp parallel reduction(+ : num_failed) // 3rd level
      {
#pragma omp single
        num_failed = num_failed + !(omp_get_num_threads() == 2);
      } // end 3rd level parallel
    } // end 2nd level parallel
  } // end 1st level parallel

// Make sure that basic single element num_threads clause works
#pragma omp parallel reduction(+ : num_failed) num_threads(4) // 1st level
  {
#pragma omp single
    num_failed = num_failed + !(omp_get_num_threads() == 4);
#pragma omp parallel reduction(+ : num_failed) // 2nd level
    {
#pragma omp single
      num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
#pragma omp parallel reduction(+ : num_failed) // 3rd level
      {
#pragma omp single
        num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
      } // end 3rd level parallel
    } // end 2nd level parallel
  } // end 1st level parallel

// Check that basic single element num_threads clause works on second level
#pragma omp parallel reduction(+ : num_failed) // 1st level
  {
#pragma omp single
    num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
#pragma omp parallel reduction(+ : num_failed) num_threads(4) // 2nd level
    {
#pragma omp single
      num_failed = num_failed + !(omp_get_num_threads() == 4);
#pragma omp parallel reduction(+ : num_failed) // 3rd level
      {
#pragma omp single
        num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
      } // end 3rd level parallel
    } // end 2nd level parallel
  } // end 1st level parallel

  // Try a short list. It should completely overwrite the old settings.
  // We need to use the compiler interface for now.
  int threads[2] = {3, 3};
  __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2,
                               threads);
#pragma omp parallel reduction(+ : num_failed) // num_threads(3,3) // 1st level
  {
#pragma omp single
    num_failed = num_failed + !(omp_get_num_threads() == 3);
#pragma omp parallel reduction(+ : num_failed) // 2nd level
    {
#pragma omp single
      num_failed = num_failed + !(omp_get_num_threads() == 3);
#pragma omp parallel reduction(+ : num_failed) // 3rd level
      {
// NOTE: should just keep using last element in list, to nesting depth
#pragma omp single
        num_failed = num_failed + !(omp_get_num_threads() == 3);
      } // end 3rd level parallel
    } // end 2nd level parallel
  } // end 1st level parallel

// Similar,  but at a lower level.
#pragma omp parallel reduction(+ : num_failed) // 1st level
  {
#pragma omp single
    num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
    int threads[2] = {3, 3};
    __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2,
                                 threads);
#pragma omp parallel reduction(+ : num_failed) // num_threads(3,3) // 2nd level
    {
#pragma omp single
      num_failed = num_failed + !(omp_get_num_threads() == 3);
#pragma omp parallel reduction(+ : num_failed) // 3rd level
      {
// NOTE: just keep using last element in list, to nesting depth
#pragma omp single
        num_failed = num_failed + !(omp_get_num_threads() == 3);
      } // end 3rd level parallel
    } // end 2nd level parallel
// Make sure a second inner parallel is NOT affected by the clause
#pragma omp parallel reduction(+ : num_failed) // 2nd level
    {
#pragma omp single
      num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
#pragma omp parallel reduction(+ : num_failed) // 3rd level
      {
#pragma omp single
        // NOTE: just keep using last element in list, to nesting depth
        num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
      } // end 3rd level parallel
    } // end 2nd level parallel
  } // end 1st level parallel

  // Test lists at multiple levels
  int threads2[2] = {3,2};
  __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2,
                               threads2);
#pragma omp parallel reduction(+ : num_failed) // num_threads(3,2) // 1st level
  {
#pragma omp single
    num_failed = num_failed + !(omp_get_num_threads() == 3);
#pragma omp parallel reduction(+ : num_failed) // 2nd level
    {
#pragma omp single
      num_failed = num_failed + !(omp_get_num_threads() == 2);
#pragma omp parallel reduction(+ : num_failed) // 3rd level
      {
#pragma omp single
        num_failed = num_failed + !(omp_get_num_threads() == 2);
        int threads3[2] = {3,1};
        __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2,
                                     threads3);
#pragma omp parallel reduction(+ : num_failed) // num_threads(3,1) // 4th level
        {
#pragma omp single
          num_failed = num_failed + !(omp_get_num_threads() == 3);
#pragma omp parallel reduction(+ : num_failed) // 5th level
          {
#pragma omp single
            num_failed = num_failed + !(omp_get_num_threads() == 1);
#pragma omp parallel reduction(+ : num_failed) // 6th level
            {
#pragma omp single
              num_failed = num_failed + !(omp_get_num_threads() == 1);
            } // end 6th level parallel
          } // end 5th level parallel
        } // end 4th level parallel
#pragma omp parallel reduction(+ : num_failed) // 4th level
        {
#pragma omp single
          num_failed = num_failed + !(omp_get_num_threads() == 2);
        } // end 4th level parallel
      } // end 3rd level parallel
    } // end 2nd level parallel
#pragma omp parallel reduction(+ : num_failed) // 2nd level
    {
#pragma omp single
      num_failed = num_failed + !(omp_get_num_threads() == 2);
#pragma omp parallel reduction(+ : num_failed) // 3rd level
      {
#pragma omp single
        num_failed = num_failed + !(omp_get_num_threads() == 2);
      } // end 3rd level parallel
    } // end 2nd level parallel
  } // end 1st level parallel

// Now we should be back to the way we started.
#pragma omp parallel reduction(+ : num_failed) // 1st level
  {
#pragma omp single
    num_failed = num_failed + !(omp_get_num_threads() == 2);
#pragma omp parallel reduction(+ : num_failed) // 2nd level
    {
#pragma omp single
      num_failed = num_failed + !(omp_get_num_threads() == 2);
#pragma omp parallel reduction(+ : num_failed) // 3rd level
      {
#pragma omp single
        num_failed = num_failed + !(omp_get_num_threads() == 2);
      } // end 3rd level parallel
    } // end 2nd level parallel
  } // end 1st level parallel

  return (!num_failed);
}

int main() {
  int i;
  int num_failed = 0;

  for (i = 0; i < REPETITIONS; i++) {
    if (!test_omp_parallel_num_threads_list()) {
      num_failed++;
    }
  }
  return num_failed;
}