// RUN: %libomp-compile-and-run
/* Test for guided scheduling
* Ensure threads get chunks interleavely first
* Then judge the chunk sizes are decreasing to a stable value
* Modified by Chunhua Liao
* For example, 100 iteration on 2 threads, chunksize 7
* one line for each dispatch, 0/1 means thread id
* 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 24
* 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 18
* 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14
* 1 1 1 1 1 1 1 1 1 1 10
* 0 0 0 0 0 0 0 0 8
* 1 1 1 1 1 1 1 7
* 0 0 0 0 0 0 0 7
* 1 1 1 1 1 1 1 7
* 0 0 0 0 0 5
*/
#include <stdio.h>
#include <stdlib.h>
#include "omp_testsuite.h"
#include "omp_my_sleep.h"
#define CFSMAX_SIZE 1000
#define MAX_TIME 0.005
#ifdef SLEEPTIME
#undef SLEEPTIME
#define SLEEPTIME 0.0001
#endif
int test_omp_for_schedule_guided()
{
int * tids;
int * chunksizes;
int notout;
int maxiter;
int threads;
int i;
int result;
tids = (int *) malloc (sizeof (int) * (CFSMAX_SIZE + 1));
maxiter = 0;
result = 1;
notout = 1;
/* Testing if enough threads are available for this check. */
#pragma omp parallel
{
#pragma omp single
{
threads = omp_get_num_threads();
}
}
/* ensure there are at least two threads */
if (threads < 2) {
omp_set_num_threads(2);
threads = 2;
}
/* Now the real parallel work:
* Each thread will start immediately with the first chunk.
*/
#pragma omp parallel shared(tids,maxiter)
{ /* begin of parallel */
double count;
int tid;
int j;
tid = omp_get_thread_num ();
#pragma omp for nowait schedule(guided)
for(j = 0; j < CFSMAX_SIZE; ++j) {
count = 0.;
#pragma omp flush(maxiter)
if (j > maxiter) {
#pragma omp critical
{
maxiter = j;
}
}
/*printf ("thread %d sleeping\n", tid);*/
#pragma omp flush(maxiter,notout)
while (notout && (count < MAX_TIME) && (maxiter == j)) {
#pragma omp flush(maxiter,notout)
my_sleep (SLEEPTIME);
count += SLEEPTIME;
#ifdef VERBOSE
printf(".");
#endif
}
#ifdef VERBOSE
if (count > 0.) printf(" waited %lf s\n", count);
#endif
/*printf ("thread %d awake\n", tid);*/
tids[j] = tid;
#ifdef VERBOSE
printf("%d finished by %d\n",j,tid);
#endif
} /* end of for */
notout = 0;
#pragma omp flush(maxiter,notout)
} /* end of parallel */
/*******************************************************
* evaluation of the values *
*******************************************************/
{
int determined_chunksize = 1;
int last_threadnr = tids[0];
int global_chunknr = 0;
int openwork = CFSMAX_SIZE;
int expected_chunk_size;
int* local_chunknr = (int*)malloc(threads * sizeof(int));
double c = 1;
for (i = 0; i < threads; i++)
local_chunknr[i] = 0;
tids[CFSMAX_SIZE] = -1;
/*
* determine the number of global chunks
*/
// fprintf(stderr,"# global_chunknr thread local_chunknr chunksize\n");
for(i = 1; i <= CFSMAX_SIZE; ++i) {
if (last_threadnr==tids[i]) {
determined_chunksize++;
} else {
/* fprintf(stderr, "%d\t%d\t%d\t%d\n", global_chunknr,
last_threadnr, local_chunknr[last_threadnr], m); */
global_chunknr++;
local_chunknr[last_threadnr]++;
last_threadnr = tids[i];
determined_chunksize = 1;
}
}
/* now allocate the memory for saving the sizes of the global chunks */
chunksizes = (int*)malloc(global_chunknr * sizeof(int));
/*
* Evaluate the sizes of the global chunks
*/
global_chunknr = 0;
determined_chunksize = 1;
last_threadnr = tids[0];
for (i = 1; i <= CFSMAX_SIZE; ++i) {
/* If the threadnumber was the same as before increase the
* detected chunksize for this chunk otherwise set the detected
* chunksize again to one and save the number of the next
* thread in last_threadnr.
*/
if (last_threadnr == tids[i]) {
determined_chunksize++;
} else {
chunksizes[global_chunknr] = determined_chunksize;
global_chunknr++;
local_chunknr[last_threadnr]++;
last_threadnr = tids[i];
determined_chunksize = 1;
}
}
#ifdef VERBOSE
fprintf(stderr, "found\texpected\tconstant\n");
#endif
/* identify the constant c for the exponential
decrease of the chunksize */
expected_chunk_size = openwork / threads;
c = (double) chunksizes[0] / expected_chunk_size;
for (i = 0; i < global_chunknr; i++) {
/* calculate the new expected chunksize */
if (expected_chunk_size > 1)
expected_chunk_size = c * openwork / threads;
#ifdef VERBOSE
fprintf(stderr, "%8d\t%8d\t%lf\n", chunksizes[i],
expected_chunk_size, c * chunksizes[i]/expected_chunk_size);
#endif
/* check if chunksize is inside the rounding errors */
if (abs (chunksizes[i] - expected_chunk_size) >= 2) {
result = 0;
#ifndef VERBOSE
fprintf(stderr, "Chunksize differed from expected "
"value: %d instead of %d\n", chunksizes[i],
expected_chunk_size);
return 0;
#endif
} /* end if */
#ifndef VERBOSE
if (expected_chunk_size - chunksizes[i] < 0)
fprintf(stderr, "Chunksize did not decrease: %d"
" instead of %d\n", chunksizes[i],expected_chunk_size);
#endif
/* calculating the remaining amount of work */
openwork -= chunksizes[i];
}
}
return result;
}
int main()
{
int i;
int num_failed=0;
for(i = 0; i < REPETITIONS; i++) {
if(!test_omp_for_schedule_guided()) {
num_failed++;
}
}
return num_failed;
}