llvm/offload/test/offloading/ompx_saxpy_mixed.c

// RUN: %libomptarget-compileopt-run-and-check-generic
//
// REQUIRES: gpu

#include <math.h>
#include <omp.h>
#include <ompx.h>
#include <stdio.h>
#include <stdlib.h>

int main(int argc, char **argv) {
  int N = 1 << 29;
  if (argc > 1)
    N = atoi(argv[1]);
  float a = 2.f;

  float *X = (float *)malloc(sizeof(*X) * N);
  float *Y = (float *)malloc(sizeof(*X) * N);

  for (int i = 0; i < N; i++) {
    X[i] = 1.0f;
    Y[i] = 2.0f;
  }

  int TL = 256;
  int NT = (N + TL - 1) / TL;

#pragma omp target data map(to : X [0:N]) map(Y [0:N])
#pragma omp target teams num_teams(NT) thread_limit(TL)
  {
#pragma omp parallel
    {
      int tid = ompx_thread_id_x();
      int bid = ompx_block_id_x();
      int tdim = ompx_block_dim_x();
      int gid = tid + bid * tdim;
      if (gid < N)
        Y[gid] = a * X[gid] + Y[gid];
    }
  }

  float maxError = 0.0f;
  for (int i = 0; i < N; i++) {
    maxError = fmax(maxError, fabs(Y[i] - 4.0f));
    if (maxError) {
      printf("%i %f %f\n", i, maxError, Y[i]);
      break;
    }
  }
  // CHECK: Max error: 0.00
  printf("Max error: %f\n", maxError);

  return 0;
}