llvm/openmp/runtime/test/worksharing/for/omp_for_schedule_guided.c

// RUN: %libomp-compile-and-run

/* Test for guided scheduling
 * Ensure threads get chunks interleavely first
 * Then judge the chunk sizes are decreasing to a stable value
 * Modified by Chunhua Liao
 * For example, 100 iteration on 2 threads, chunksize 7
 * one line for each dispatch, 0/1 means thread id
 * 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  24
 * 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1        18
 * 0 0 0 0 0 0 0 0 0 0 0 0 0 0            14
 * 1 1 1 1 1 1 1 1 1 1                10
 * 0 0 0 0 0 0 0 0                   8
 * 1 1 1 1 1 1 1                   7
 * 0 0 0 0 0 0 0                   7
 * 1 1 1 1 1 1 1                   7
 * 0 0 0 0 0                     5
*/
#include <stdio.h>
#include <stdlib.h>
#include "omp_testsuite.h"
#include "omp_my_sleep.h"

#define CFSMAX_SIZE 1000
#define MAX_TIME  0.005

#ifdef SLEEPTIME
#undef SLEEPTIME
#define SLEEPTIME 0.0001
#endif

int test_omp_for_schedule_guided()
{
  int * tids;
  int * chunksizes;
  int notout;
  int maxiter;
  int threads;
  int i;
  int result;

  tids = (int *) malloc (sizeof (int) * (CFSMAX_SIZE + 1));
  maxiter = 0;
  result = 1;
  notout = 1;

  /* Testing if enough threads are available for this check. */
  #pragma omp parallel
  {
    #pragma omp single
    {
      threads = omp_get_num_threads();
    }
  }

  /* ensure there are at least two threads */
  if (threads < 2) {
    omp_set_num_threads(2);
    threads = 2;
  }

  /* Now the real parallel work:
   * Each thread will start immediately with the first chunk.
   */
  #pragma omp parallel shared(tids,maxiter)
  {  /* begin of parallel */
    double count;
    int tid;
    int j;

    tid = omp_get_thread_num ();

    #pragma omp for nowait schedule(guided)
    for(j = 0; j < CFSMAX_SIZE; ++j) {
      count = 0.;
      #pragma omp flush(maxiter)
      if (j > maxiter) {
        #pragma omp critical
        {
          maxiter = j;
        }
      }
      /*printf ("thread %d sleeping\n", tid);*/
      #pragma omp flush(maxiter,notout)
      while (notout && (count < MAX_TIME) && (maxiter == j)) {
        #pragma omp flush(maxiter,notout)
        my_sleep (SLEEPTIME);
        count += SLEEPTIME;
#ifdef VERBOSE
        printf(".");
#endif
      }
#ifdef VERBOSE
      if (count > 0.) printf(" waited %lf s\n", count);
#endif
      /*printf ("thread %d awake\n", tid);*/
      tids[j] = tid;
#ifdef VERBOSE
      printf("%d finished by %d\n",j,tid);
#endif
    } /* end of for */
    notout = 0;
    #pragma omp flush(maxiter,notout)
  } /* end of parallel */

  /*******************************************************
   * evaluation of the values              *
   *******************************************************/
  {
    int determined_chunksize = 1;
    int last_threadnr = tids[0];
    int global_chunknr = 0;
    int openwork = CFSMAX_SIZE;
    int expected_chunk_size;
    int* local_chunknr = (int*)malloc(threads * sizeof(int));
    double c = 1;

    for (i = 0; i < threads; i++)
      local_chunknr[i] = 0;

    tids[CFSMAX_SIZE] = -1;

    /*
     * determine the number of global chunks
     */
    // fprintf(stderr,"# global_chunknr thread local_chunknr chunksize\n");
    for(i = 1; i <= CFSMAX_SIZE; ++i) {
      if (last_threadnr==tids[i]) {
        determined_chunksize++;
      } else {
        /* fprintf(stderr, "%d\t%d\t%d\t%d\n", global_chunknr,
           last_threadnr, local_chunknr[last_threadnr], m); */
        global_chunknr++;
        local_chunknr[last_threadnr]++;
        last_threadnr = tids[i];
        determined_chunksize = 1;
      }
    }
    /* now allocate the memory for saving the sizes of the global chunks */
    chunksizes = (int*)malloc(global_chunknr * sizeof(int));

    /*
    * Evaluate the sizes of the global chunks
    */
    global_chunknr = 0;
    determined_chunksize = 1;
    last_threadnr = tids[0];
    for (i = 1; i <= CFSMAX_SIZE; ++i) {
      /* If the threadnumber was the same as before increase the
       * detected chunksize for this chunk otherwise set the detected
       * chunksize again to one and save the number of the next
       * thread in last_threadnr.
       */
      if (last_threadnr == tids[i]) {
        determined_chunksize++;
      } else {
        chunksizes[global_chunknr] = determined_chunksize;
        global_chunknr++;
        local_chunknr[last_threadnr]++;
        last_threadnr = tids[i];
        determined_chunksize = 1;
      }
    }

#ifdef VERBOSE
    fprintf(stderr, "found\texpected\tconstant\n");
#endif

    /* identify the constant c for the exponential
       decrease of the chunksize */
    expected_chunk_size = openwork / threads;
    c = (double) chunksizes[0] / expected_chunk_size;

    for (i = 0; i < global_chunknr; i++) {
      /* calculate the new expected chunksize */
      if (expected_chunk_size > 1)
        expected_chunk_size = c * openwork / threads;
#ifdef VERBOSE
      fprintf(stderr, "%8d\t%8d\t%lf\n", chunksizes[i],
        expected_chunk_size, c * chunksizes[i]/expected_chunk_size);
#endif
      /* check if chunksize is inside the rounding errors */
      if (abs (chunksizes[i] - expected_chunk_size) >= 2) {
        result = 0;
#ifndef VERBOSE
        fprintf(stderr, "Chunksize differed from expected "
          "value: %d instead of %d\n", chunksizes[i],
          expected_chunk_size);
        return 0;
#endif
      } /* end if */

#ifndef VERBOSE
      if (expected_chunk_size - chunksizes[i] < 0)
        fprintf(stderr, "Chunksize did not decrease: %d"
          " instead of %d\n", chunksizes[i],expected_chunk_size);
#endif

      /* calculating the remaining amount of work */
      openwork -= chunksizes[i];
    }
  }
  return result;
}

int main()
{
  int i;
  int num_failed=0;

  for(i = 0; i < REPETITIONS; i++) {
    if(!test_omp_for_schedule_guided()) {
      num_failed++;
    }
  }
  return num_failed;
}