llvm/openmp/tools/multiplex/ompt-multiplex.h

//===--- ompt-multiplex.h - header-only multiplexing of OMPT tools -- C -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This header file enables an OMPT tool to load another OMPT tool and
// automatically forwards OMPT event-callbacks to the nested tool.
//
// For details see openmp/tools/multiplex/README.md
//
//===----------------------------------------------------------------------===//

#ifndef OMPT_MULTIPLEX_H
#define OMPT_MULTIPLEX_H

#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <dlfcn.h>
#include <errno.h>
#include <execinfo.h>
#include <inttypes.h>
#include <omp-tools.h>
#include <omp.h>
#include <stdio.h>
#include <string.h>

static ompt_set_callback_t ompt_multiplex_set_callback;
static ompt_get_task_info_t ompt_multiplex_get_task_info;
static ompt_get_thread_data_t ompt_multiplex_get_thread_data;
static ompt_get_parallel_info_t ompt_multiplex_get_parallel_info;

// If OMPT_MULTIPLEX_TOOL_NAME is defined, use the tool name as prefix
// contains name of the environment var in which the tool path is specified
// for TOOL_LIBRARIES and VERBOSE_INIT variables. Only overwrite, if
// they are not explicitly defined.
#ifdef OMPT_MULTIPLEX_TOOL_NAME
#ifndef CLIENT_TOOL_LIBRARIES_VAR
#define CLIENT_TOOL_LIBRARIES_VAR OMPT_MULTIPLEX_TOOL_NAME "_TOOL_LIBRARIES"
#endif
#ifndef CLIENT_TOOL_VERBOSE_INIT_VAR
#define CLIENT_TOOL_VERBOSE_INIT_VAR                                           \
  OMPT_MULTIPLEX_TOOL_NAME "_TOOL_VERBOSE_INIT"
#endif
#endif

// If CLIENT_TOOL_VERBOSE_INIT_VAR is still not defined, use the OMPT
// env var.
#ifndef CLIENT_TOOL_VERBOSE_INIT_VAR
#warning CLIENT_TOOL_VERBOSE_INIT_VAR redefined to OMP_TOOL_VERBOSE_INIT
#define CLIENT_TOOL_VERBOSE_INIT_VAR "OMP_TOOL_VERBOSE_INIT"
#endif

// contains name of the environment var in which the tool path is specified
#ifndef CLIENT_TOOL_LIBRARIES_VAR
#error CLIENT_TOOL_LIBRARIES_VAR should be defined before including of ompt-multiplex.h
#endif

#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA) &&                         \
    !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA)
#error OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA must be set if OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA is set
#endif

#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA) &&                     \
    !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA)
#error OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA must be set if OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA is set
#endif

#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA) &&                       \
    !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA)
#error OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA must be set if OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA is set
#endif

#define OMPT_API_ROUTINE static

#ifndef OMPT_STR_MATCH
#define OMPT_STR_MATCH(haystack, needle) (!strcasecmp(haystack, needle))
#endif

// prints for an enabled OMP_TOOL_VERBOSE_INIT.
// In the future a prefix could be added in the first define, the second define
// omits the prefix to allow for continued lines. Example: "PREFIX: Start
// tool... Success." instead of "PREFIX: Start tool... PREFIX: Success."
#define OMPT_VERBOSE_INIT_PRINT(...)                                           \
  if (verbose_init)                                                            \
  fprintf(verbose_file, __VA_ARGS__)
#define OMPT_VERBOSE_INIT_CONTINUED_PRINT(...)                                 \
  if (verbose_init)                                                            \
  fprintf(verbose_file, __VA_ARGS__)

static FILE *verbose_file;
static int verbose_init;

void setup_verbose_init() {
  const char *ompt_env_verbose_init = getenv(CLIENT_TOOL_VERBOSE_INIT_VAR);
  // possible options: disabled | stdout | stderr | <filename>
  // if set, not empty and not disabled -> prepare for logging
  if (ompt_env_verbose_init && strcmp(ompt_env_verbose_init, "") &&
      !OMPT_STR_MATCH(ompt_env_verbose_init, "disabled")) {
    verbose_init = 1;
    if (OMPT_STR_MATCH(ompt_env_verbose_init, "STDERR"))
      verbose_file = stderr;
    else if (OMPT_STR_MATCH(ompt_env_verbose_init, "STDOUT"))
      verbose_file = stdout;
    else if (!OMPT_STR_MATCH(ompt_env_verbose_init,
                             getenv("OMP_TOOL_VERBOSE_INIT")))
      verbose_file = fopen(ompt_env_verbose_init, "w");
    else {
      verbose_init = 0;
      printf("Multiplex: Can not open file defined in OMP_TOOL_VERBOSE_INIT "
             "twice.");
    }
  } else
    verbose_init = 0;
}

#define OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(macro)                             \
  macro(callback_thread_begin, ompt_callback_thread_begin_t, 1);               \
  macro(callback_thread_end, ompt_callback_thread_end_t, 2);                   \
  macro(callback_parallel_begin, ompt_callback_parallel_begin_t, 3);           \
  macro(callback_parallel_end, ompt_callback_parallel_end_t, 4);               \
  macro(callback_task_create, ompt_callback_task_create_t, 5);                 \
  macro(callback_task_schedule, ompt_callback_task_schedule_t, 6);             \
  macro(callback_implicit_task, ompt_callback_implicit_task_t, 7);             \
  macro(callback_target, ompt_callback_target_t, 8);                           \
  macro(callback_target_data_op, ompt_callback_target_data_op_t, 9);           \
  macro(callback_target_submit, ompt_callback_target_submit_t, 10);            \
  macro(callback_control_tool, ompt_callback_control_tool_t, 11);              \
  macro(callback_device_initialize, ompt_callback_device_initialize_t, 12);    \
  macro(callback_device_finalize, ompt_callback_device_finalize_t, 13);        \
  macro(callback_device_load, ompt_callback_device_load_t, 14);                \
  macro(callback_device_unload, ompt_callback_device_unload_t, 15);            \
  macro(callback_sync_region_wait, ompt_callback_sync_region_t, 16);           \
  macro(callback_mutex_released, ompt_callback_mutex_t, 17);                   \
  macro(callback_dependences, ompt_callback_dependences_t, 18);                \
  macro(callback_task_dependence, ompt_callback_task_dependence_t, 19);        \
  macro(callback_work, ompt_callback_work_t, 20);                              \
  macro(callback_masked, ompt_callback_masked_t, 21);                          \
  macro(callback_target_map, ompt_callback_target_map_t, 22);                  \
  macro(callback_sync_region, ompt_callback_sync_region_t, 23);                \
  macro(callback_lock_init, ompt_callback_mutex_acquire_t, 24);                \
  macro(callback_lock_destroy, ompt_callback_mutex_t, 25);                     \
  macro(callback_mutex_acquire, ompt_callback_mutex_acquire_t, 26);            \
  macro(callback_mutex_acquired, ompt_callback_mutex_t, 27);                   \
  macro(callback_nest_lock, ompt_callback_nest_lock_t, 28);                    \
  macro(callback_flush, ompt_callback_flush_t, 29);                            \
  macro(callback_cancel, ompt_callback_cancel_t, 30);                          \
  macro(callback_reduction, ompt_callback_sync_region_t, 31);                  \
  macro(callback_dispatch, ompt_callback_dispatch_t, 32);

typedef struct ompt_multiplex_callbacks_s {
#define ompt_event_macro(event, callback, eventid) callback ompt_##event

  OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro)

#undef ompt_event_macro
} ompt_multiplex_callbacks_t;

typedef struct ompt_multiplex_callback_implementation_status_s {
#define ompt_event_macro(event, callback, eventid) int ompt_##event

  OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro)

#undef ompt_event_macro
} ompt_multiplex_callback_implementation_status_t;

ompt_start_tool_result_t *ompt_multiplex_own_fns = NULL;
ompt_start_tool_result_t *ompt_multiplex_client_fns = NULL;
ompt_function_lookup_t ompt_multiplex_lookup_function;
ompt_multiplex_callbacks_t ompt_multiplex_own_callbacks,
    ompt_multiplex_client_callbacks;
ompt_multiplex_callback_implementation_status_t
    ompt_multiplex_implementation_status;

typedef struct ompt_multiplex_data_pair_s {
  ompt_data_t own_data;
  ompt_data_t client_data;
} ompt_multiplex_data_pair_t;

#if !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA) ||                  \
    !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA) ||                \
    !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA)
static ompt_multiplex_data_pair_t *
ompt_multiplex_allocate_data_pair(ompt_data_t *data_pointer) {
  data_pointer->ptr = malloc(sizeof(ompt_multiplex_data_pair_t));
  if (!data_pointer->ptr) {
    printf("Malloc ERROR\n");
    exit(-1);
  }
  ompt_multiplex_data_pair_t *data_pair =
      (ompt_multiplex_data_pair_t *)data_pointer->ptr;
  data_pair->own_data.ptr = NULL;
  data_pair->client_data.ptr = NULL;
  return data_pair;
}

static void ompt_multiplex_free_data_pair(ompt_data_t *data_pointer) {
  free((*data_pointer).ptr);
}

static ompt_data_t *ompt_multiplex_get_own_ompt_data(ompt_data_t *data) {
  if (!data)
    return NULL;
  if (!data->ptr)
    return NULL;
  ompt_multiplex_data_pair_t *data_pair =
      (ompt_multiplex_data_pair_t *)data->ptr;
  return &(data_pair->own_data);
}

static ompt_data_t *ompt_multiplex_get_client_ompt_data(ompt_data_t *data) {
  if (!data)
    return NULL;
  if (!data->ptr)
    return NULL;
  ompt_multiplex_data_pair_t *data_pair =
      (ompt_multiplex_data_pair_t *)data->ptr;
  return &(data_pair->client_data);
}
#endif //! defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA) ||
       //! !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA) ||
       //! !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA)

static ompt_data_t *ompt_multiplex_get_own_thread_data(ompt_data_t *data) {
#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA
  return ompt_multiplex_get_own_ompt_data(data);
#else
  return data;
#endif
}

static ompt_data_t *ompt_multiplex_get_own_parallel_data(ompt_data_t *data) {
#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
  return ompt_multiplex_get_own_ompt_data(data);
#else
  return data;
#endif
}

static ompt_data_t *ompt_multiplex_get_own_task_data(ompt_data_t *data) {
#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
  return ompt_multiplex_get_own_ompt_data(data);
#else
  return data;
#endif
}

static ompt_data_t *ompt_multiplex_get_client_thread_data(ompt_data_t *data) {
#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA
  return ompt_multiplex_get_client_ompt_data(data);
#else
  return OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA(data);
#endif
}

static ompt_data_t *ompt_multiplex_get_client_parallel_data(ompt_data_t *data) {
#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
  return ompt_multiplex_get_client_ompt_data(data);
#else
  return OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA(data);
#endif
}

static ompt_data_t *ompt_multiplex_get_client_task_data(ompt_data_t *data) {
#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
  return ompt_multiplex_get_client_ompt_data(data);
#else
  return OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA(data);
#endif
}

static void ompt_multiplex_callback_mutex_acquire(ompt_mutex_t kind,
                                                  unsigned int hint,
                                                  unsigned int impl,
                                                  ompt_wait_id_t wait_id,
                                                  const void *codeptr_ra) {
  if (ompt_multiplex_own_callbacks.ompt_callback_mutex_acquire) {
    ompt_multiplex_own_callbacks.ompt_callback_mutex_acquire(
        kind, hint, impl, wait_id, codeptr_ra);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_mutex_acquire) {
    ompt_multiplex_client_callbacks.ompt_callback_mutex_acquire(
        kind, hint, impl, wait_id, codeptr_ra);
  }
}

static void ompt_multiplex_callback_mutex_acquired(ompt_mutex_t kind,
                                                   ompt_wait_id_t wait_id,
                                                   const void *codeptr_ra) {
  if (ompt_multiplex_own_callbacks.ompt_callback_mutex_acquired) {
    ompt_multiplex_own_callbacks.ompt_callback_mutex_acquired(kind, wait_id,
                                                              codeptr_ra);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_mutex_acquired) {
    ompt_multiplex_client_callbacks.ompt_callback_mutex_acquired(kind, wait_id,
                                                                 codeptr_ra);
  }
}

static void ompt_multiplex_callback_mutex_released(ompt_mutex_t kind,
                                                   ompt_wait_id_t wait_id,
                                                   const void *codeptr_ra) {
  if (ompt_multiplex_own_callbacks.ompt_callback_mutex_released) {
    ompt_multiplex_own_callbacks.ompt_callback_mutex_released(kind, wait_id,
                                                              codeptr_ra);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_mutex_released) {
    ompt_multiplex_client_callbacks.ompt_callback_mutex_released(kind, wait_id,
                                                                 codeptr_ra);
  }
}

static void ompt_multiplex_callback_nest_lock(ompt_scope_endpoint_t endpoint,
                                              ompt_wait_id_t wait_id,
                                              const void *codeptr_ra) {
  if (ompt_multiplex_own_callbacks.ompt_callback_nest_lock) {
    ompt_multiplex_own_callbacks.ompt_callback_nest_lock(endpoint, wait_id,
                                                         codeptr_ra);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_nest_lock) {
    ompt_multiplex_client_callbacks.ompt_callback_nest_lock(endpoint, wait_id,
                                                            codeptr_ra);
  }
}

static void ompt_multiplex_callback_sync_region(ompt_sync_region_t kind,
                                                ompt_scope_endpoint_t endpoint,
                                                ompt_data_t *parallel_data,
                                                ompt_data_t *task_data,
                                                const void *codeptr_ra) {
  if (ompt_multiplex_own_callbacks.ompt_callback_sync_region) {
    ompt_multiplex_own_callbacks.ompt_callback_sync_region(
        kind, endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
        ompt_multiplex_get_own_task_data(task_data), codeptr_ra);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_sync_region) {
    ompt_multiplex_client_callbacks.ompt_callback_sync_region(
        kind, endpoint, ompt_multiplex_get_client_parallel_data(parallel_data),
        ompt_multiplex_get_client_task_data(task_data), codeptr_ra);
  }
}

static void ompt_multiplex_callback_sync_region_wait(
    ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint,
    ompt_data_t *parallel_data, ompt_data_t *task_data,
    const void *codeptr_ra) {
  if (ompt_multiplex_own_callbacks.ompt_callback_sync_region_wait) {
    ompt_multiplex_own_callbacks.ompt_callback_sync_region_wait(
        kind, endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
        ompt_multiplex_get_own_task_data(task_data), codeptr_ra);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_sync_region_wait) {
    ompt_multiplex_client_callbacks.ompt_callback_sync_region_wait(
        kind, endpoint, ompt_multiplex_get_client_parallel_data(parallel_data),
        ompt_multiplex_get_client_task_data(task_data), codeptr_ra);
  }
}

static void ompt_multiplex_callback_flush(ompt_data_t *thread_data,
                                          const void *codeptr_ra) {
  if (ompt_multiplex_own_callbacks.ompt_callback_flush) {
    ompt_multiplex_own_callbacks.ompt_callback_flush(
        ompt_multiplex_get_own_thread_data(thread_data), codeptr_ra);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_flush) {
    ompt_multiplex_client_callbacks.ompt_callback_flush(
        ompt_multiplex_get_client_thread_data(thread_data), codeptr_ra);
  }
}

static void ompt_multiplex_callback_cancel(ompt_data_t *task_data, int flags,
                                           const void *codeptr_ra) {
  if (ompt_multiplex_own_callbacks.ompt_callback_cancel) {
    ompt_multiplex_own_callbacks.ompt_callback_cancel(
        ompt_multiplex_get_own_task_data(task_data), flags, codeptr_ra);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_cancel) {
    ompt_multiplex_client_callbacks.ompt_callback_cancel(
        ompt_multiplex_get_client_task_data(task_data), flags, codeptr_ra);
  }
}

static void ompt_multiplex_callback_implicit_task(
    ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data,
    ompt_data_t *task_data, unsigned int team_size, unsigned int thread_num,
    int flags) {
  if (endpoint == ompt_scope_begin) {
#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
    ompt_multiplex_allocate_data_pair(task_data);
#endif
#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
    if (flags & ompt_task_initial)
      ompt_multiplex_allocate_data_pair(parallel_data);
#endif
    if (ompt_multiplex_own_callbacks.ompt_callback_implicit_task) {
      ompt_multiplex_own_callbacks.ompt_callback_implicit_task(
          endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
          ompt_multiplex_get_own_task_data(task_data), team_size, thread_num,
          flags);
    }
    if (ompt_multiplex_client_callbacks.ompt_callback_implicit_task) {
      ompt_multiplex_client_callbacks.ompt_callback_implicit_task(
          endpoint, ompt_multiplex_get_client_parallel_data(parallel_data),
          ompt_multiplex_get_client_task_data(task_data), team_size, thread_num,
          flags);
    }
  } else {
// defines to make sure, callbacks are called in correct order depending on
// defines set by the user
#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA) ||                         \
    !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA)
    if (ompt_multiplex_own_callbacks.ompt_callback_implicit_task) {
      ompt_multiplex_own_callbacks.ompt_callback_implicit_task(
          endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
          ompt_multiplex_get_own_task_data(task_data), team_size, thread_num,
          flags);
    }
#endif

    if (ompt_multiplex_client_callbacks.ompt_callback_implicit_task) {
      ompt_multiplex_client_callbacks.ompt_callback_implicit_task(
          endpoint, ompt_multiplex_get_client_parallel_data(parallel_data),
          ompt_multiplex_get_client_task_data(task_data), team_size, thread_num,
          flags);
    }

#if defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA) &&                     \
    !defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA)
    if (ompt_multiplex_own_callbacks.ompt_callback_implicit_task) {
      ompt_multiplex_own_callbacks.ompt_callback_implicit_task(
          endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
          ompt_multiplex_get_own_task_data(task_data), team_size, thread_num,
          flags);
    }
#endif

#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
    ompt_multiplex_free_data_pair(task_data);
#endif

#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA)
    if (flags & ompt_task_initial)
      OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA(parallel_data);
#endif
#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA)
    OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA(task_data);
#endif
  }
}

static void ompt_multiplex_callback_lock_init(ompt_mutex_t kind,
                                              unsigned int hint,
                                              unsigned int impl,
                                              ompt_wait_id_t wait_id,
                                              const void *codeptr_ra) {
  if (ompt_multiplex_own_callbacks.ompt_callback_lock_init) {
    ompt_multiplex_own_callbacks.ompt_callback_lock_init(kind, hint, impl,
                                                         wait_id, codeptr_ra);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_lock_init) {
    ompt_multiplex_client_callbacks.ompt_callback_lock_init(
        kind, hint, impl, wait_id, codeptr_ra);
  }
}

static void ompt_multiplex_callback_lock_destroy(ompt_mutex_t kind,
                                                 ompt_wait_id_t wait_id,
                                                 const void *codeptr_ra) {
  if (ompt_multiplex_own_callbacks.ompt_callback_lock_destroy) {
    ompt_multiplex_own_callbacks.ompt_callback_lock_destroy(kind, wait_id,
                                                            codeptr_ra);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_lock_destroy) {
    ompt_multiplex_client_callbacks.ompt_callback_lock_destroy(kind, wait_id,
                                                               codeptr_ra);
  }
}

static void ompt_multiplex_callback_work(ompt_work_t wstype,
                                         ompt_scope_endpoint_t endpoint,
                                         ompt_data_t *parallel_data,
                                         ompt_data_t *task_data, uint64_t count,
                                         const void *codeptr_ra) {
  if (ompt_multiplex_own_callbacks.ompt_callback_work) {
    ompt_multiplex_own_callbacks.ompt_callback_work(
        wstype, endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
        ompt_multiplex_get_own_task_data(task_data), count, codeptr_ra);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_work) {
    ompt_multiplex_client_callbacks.ompt_callback_work(
        wstype, endpoint,
        ompt_multiplex_get_client_parallel_data(parallel_data),
        ompt_multiplex_get_client_task_data(task_data), count, codeptr_ra);
  }
}

static void ompt_multiplex_callback_masked(ompt_scope_endpoint_t endpoint,
                                           ompt_data_t *parallel_data,
                                           ompt_data_t *task_data,
                                           const void *codeptr_ra) {
  if (ompt_multiplex_own_callbacks.ompt_callback_masked) {
    ompt_multiplex_own_callbacks.ompt_callback_masked(
        endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
        ompt_multiplex_get_own_task_data(task_data), codeptr_ra);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_masked) {
    ompt_multiplex_client_callbacks.ompt_callback_masked(
        endpoint, ompt_multiplex_get_client_parallel_data(parallel_data),
        ompt_multiplex_get_client_task_data(task_data), codeptr_ra);
  }
}

static void ompt_multiplex_callback_parallel_begin(
    ompt_data_t *parent_task_data, const ompt_frame_t *parent_task_frame,
    ompt_data_t *parallel_data, uint32_t requested_team_size, int flag,
    const void *codeptr_ra) {
#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
  ompt_multiplex_allocate_data_pair(parallel_data);
#endif
  if (ompt_multiplex_own_callbacks.ompt_callback_parallel_begin) {
    ompt_multiplex_own_callbacks.ompt_callback_parallel_begin(
        ompt_multiplex_get_own_task_data(parent_task_data), parent_task_frame,
        ompt_multiplex_get_own_parallel_data(parallel_data),
        requested_team_size, flag, codeptr_ra);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_parallel_begin) {
    ompt_multiplex_client_callbacks.ompt_callback_parallel_begin(
        ompt_multiplex_get_client_task_data(parent_task_data),
        parent_task_frame,
        ompt_multiplex_get_client_parallel_data(parallel_data),
        requested_team_size, flag, codeptr_ra);
  }
}

static void ompt_multiplex_callback_parallel_end(ompt_data_t *parallel_data,
                                                 ompt_data_t *task_data,
                                                 int flag,
                                                 const void *codeptr_ra) {
// defines to make sure, callbacks are called in correct order depending on
// defines set by the user
#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA) ||                     \
    !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA)
  if (ompt_multiplex_own_callbacks.ompt_callback_parallel_end) {
    ompt_multiplex_own_callbacks.ompt_callback_parallel_end(
        ompt_multiplex_get_own_parallel_data(parallel_data),
        ompt_multiplex_get_own_task_data(task_data), flag, codeptr_ra);
  }
#endif

  if (ompt_multiplex_client_callbacks.ompt_callback_parallel_end) {
    ompt_multiplex_client_callbacks.ompt_callback_parallel_end(
        ompt_multiplex_get_client_parallel_data(parallel_data),
        ompt_multiplex_get_client_task_data(task_data), flag, codeptr_ra);
  }

#if defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA) &&                 \
    !defined(OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA)
  if (ompt_multiplex_own_callbacks.ompt_callback_parallel_end) {
    ompt_multiplex_own_callbacks.ompt_callback_parallel_end(
        ompt_multiplex_get_own_parallel_data(parallel_data),
        ompt_multiplex_get_own_task_data(task_data), flag, codeptr_ra);
  }
#endif

#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
  ompt_multiplex_free_data_pair(parallel_data);
#endif

#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA)
  OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA(parallel_data);
#endif
}

static void ompt_multiplex_callback_task_create(
    ompt_data_t *parent_task_data, const ompt_frame_t *parent_frame,
    ompt_data_t *new_task_data, int type, int has_dependences,
    const void *codeptr_ra) {
#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
  ompt_multiplex_allocate_data_pair(new_task_data);
#endif

#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
  if (type & ompt_task_initial) {
    ompt_data_t *parallel_data;
    ompt_multiplex_get_parallel_info(0, &parallel_data, NULL);
    ompt_multiplex_allocate_data_pair(parallel_data);
  }
#endif

  if (ompt_multiplex_own_callbacks.ompt_callback_task_create) {
    ompt_multiplex_own_callbacks.ompt_callback_task_create(
        ompt_multiplex_get_own_task_data(parent_task_data), parent_frame,
        ompt_multiplex_get_own_task_data(new_task_data), type, has_dependences,
        codeptr_ra);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_task_create) {
    ompt_multiplex_client_callbacks.ompt_callback_task_create(
        ompt_multiplex_get_client_task_data(parent_task_data), parent_frame,
        ompt_multiplex_get_client_task_data(new_task_data), type,
        has_dependences, codeptr_ra);
  }
}

static void
ompt_multiplex_callback_task_schedule(ompt_data_t *first_task_data,
                                      ompt_task_status_t prior_task_status,
                                      ompt_data_t *second_task_data) {
  if (prior_task_status != ompt_task_complete) {
    if (ompt_multiplex_own_callbacks.ompt_callback_task_schedule) {
      ompt_multiplex_own_callbacks.ompt_callback_task_schedule(
          ompt_multiplex_get_own_task_data(first_task_data), prior_task_status,
          ompt_multiplex_get_own_task_data(second_task_data));
    }
    if (ompt_multiplex_client_callbacks.ompt_callback_task_schedule) {
      ompt_multiplex_client_callbacks.ompt_callback_task_schedule(
          ompt_multiplex_get_client_task_data(first_task_data),
          prior_task_status,
          ompt_multiplex_get_client_task_data(second_task_data));
    }
  } else {
// defines to make sure, callbacks are called in correct order depending on
// defines set by the user
#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA) ||                         \
    !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA)
    if (ompt_multiplex_own_callbacks.ompt_callback_task_schedule) {
      ompt_multiplex_own_callbacks.ompt_callback_task_schedule(
          ompt_multiplex_get_own_task_data(first_task_data), prior_task_status,
          ompt_multiplex_get_own_task_data(second_task_data));
    }
#endif

    if (ompt_multiplex_client_callbacks.ompt_callback_task_schedule) {
      ompt_multiplex_client_callbacks.ompt_callback_task_schedule(
          ompt_multiplex_get_client_task_data(first_task_data),
          prior_task_status,
          ompt_multiplex_get_client_task_data(second_task_data));
    }

#if defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA) &&                     \
    !defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA)
    if (ompt_multiplex_own_callbacks.ompt_callback_task_schedule) {
      ompt_multiplex_own_callbacks.ompt_callback_task_schedule(
          ompt_multiplex_get_own_task_data(first_task_data), prior_task_status,
          ompt_multiplex_get_own_task_data(second_task_data));
    }
#endif

#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
    ompt_multiplex_free_data_pair(first_task_data);
#endif

#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA)
    OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA(first_task_data);
#endif
  }
}

static void ompt_multiplex_callback_dependences(ompt_data_t *task_data,
                                                const ompt_dependence_t *deps,
                                                int ndeps) {
  if (ompt_multiplex_own_callbacks.ompt_callback_dependences) {
    ompt_multiplex_own_callbacks.ompt_callback_dependences(
        ompt_multiplex_get_own_task_data(task_data), deps, ndeps);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_dependences) {
    ompt_multiplex_client_callbacks.ompt_callback_dependences(
        ompt_multiplex_get_client_task_data(task_data), deps, ndeps);
  }
}

static void
ompt_multiplex_callback_task_dependence(ompt_data_t *first_task_data,
                                        ompt_data_t *second_task_data) {
  if (ompt_multiplex_own_callbacks.ompt_callback_task_dependence) {
    ompt_multiplex_own_callbacks.ompt_callback_task_dependence(
        ompt_multiplex_get_own_task_data(first_task_data),
        ompt_multiplex_get_own_task_data(second_task_data));
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_task_dependence) {
    ompt_multiplex_client_callbacks.ompt_callback_task_dependence(
        ompt_multiplex_get_client_task_data(first_task_data),
        ompt_multiplex_get_client_task_data(second_task_data));
  }
}

static void ompt_multiplex_callback_thread_begin(ompt_thread_t thread_type,
                                                 ompt_data_t *thread_data) {
#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA
  ompt_multiplex_allocate_data_pair(thread_data);
#endif
  if (ompt_multiplex_own_callbacks.ompt_callback_thread_begin) {
    ompt_multiplex_own_callbacks.ompt_callback_thread_begin(
        thread_type, ompt_multiplex_get_own_thread_data(thread_data));
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_thread_begin) {
    ompt_multiplex_client_callbacks.ompt_callback_thread_begin(
        thread_type, ompt_multiplex_get_client_thread_data(thread_data));
  }
}

static void ompt_multiplex_callback_thread_end(ompt_data_t *thread_data) {
// defines to make sure, callbacks are called in correct order depending on
// defines set by the user
#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA) ||                       \
    !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA)
  if (ompt_multiplex_own_callbacks.ompt_callback_thread_end) {
    ompt_multiplex_own_callbacks.ompt_callback_thread_end(
        ompt_multiplex_get_own_thread_data(thread_data));
  }
#endif

  if (ompt_multiplex_client_callbacks.ompt_callback_thread_end) {
    ompt_multiplex_client_callbacks.ompt_callback_thread_end(
        ompt_multiplex_get_client_thread_data(thread_data));
  }

#if defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA) &&                   \
    !defined(OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA)
  if (ompt_multiplex_own_callbacks.ompt_callback_thread_end) {
    ompt_multiplex_own_callbacks.ompt_callback_thread_end(
        ompt_multiplex_get_own_thread_data(thread_data));
  }
#endif

#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA
  ompt_multiplex_free_data_pair(thread_data);
#endif

#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA)
  OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA(thread_data);
#endif
}

static int ompt_multiplex_callback_control_tool(uint64_t command,
                                                uint64_t modifier, void *arg,
                                                const void *codeptr_ra) {
  int ownRet = 0, clientRet = 0;
  if (ompt_multiplex_own_callbacks.ompt_callback_control_tool) {
    ownRet = ompt_multiplex_own_callbacks.ompt_callback_control_tool(
        command, modifier, arg, codeptr_ra);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_control_tool) {
    clientRet = ompt_multiplex_client_callbacks.ompt_callback_control_tool(
        command, modifier, arg, codeptr_ra);
  }
  return ownRet < clientRet ? ownRet : clientRet;
}

static void ompt_multiplex_callback_target(
    ompt_target_t kind, ompt_scope_endpoint_t endpoint, int device_num,
    ompt_data_t *task_data, ompt_id_t target_id, const void *codeptr_ra) {
  if (ompt_multiplex_own_callbacks.ompt_callback_target) {
    ompt_multiplex_own_callbacks.ompt_callback_target(
        kind, endpoint, device_num, ompt_multiplex_get_own_task_data(task_data),
        target_id, codeptr_ra);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_target) {
    ompt_multiplex_client_callbacks.ompt_callback_target(
        kind, endpoint, device_num,
        ompt_multiplex_get_client_task_data(task_data), target_id, codeptr_ra);
  }
}

static void ompt_multiplex_callback_target_data_op(
    ompt_id_t target_id, ompt_id_t host_op_id, ompt_target_data_op_t optype,
    void *src_addr, int src_device_num, void *dest_addr, int dest_device_num,
    size_t bytes, const void *codeptr_ra) {
  if (ompt_multiplex_own_callbacks.ompt_callback_target_data_op) {
    ompt_multiplex_own_callbacks.ompt_callback_target_data_op(
        target_id, host_op_id, optype, src_addr, src_device_num, dest_addr,
        dest_device_num, bytes, codeptr_ra);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_target_data_op) {
    ompt_multiplex_client_callbacks.ompt_callback_target_data_op(
        target_id, host_op_id, optype, src_addr, src_device_num, dest_addr,
        dest_device_num, bytes, codeptr_ra);
  }
}

static void
ompt_multiplex_callback_target_submit(ompt_id_t target_id, ompt_id_t host_op_id,
                                      unsigned int requested_num_teams) {
  if (ompt_multiplex_own_callbacks.ompt_callback_target_submit) {
    ompt_multiplex_own_callbacks.ompt_callback_target_submit(
        target_id, host_op_id, requested_num_teams);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_target_submit) {
    ompt_multiplex_client_callbacks.ompt_callback_target_submit(
        target_id, host_op_id, requested_num_teams);
  }
}

static void ompt_multiplex_callback_device_initialize(
    int device_num, const char *type, ompt_device_t *device,
    ompt_function_lookup_t lookup, const char *documentation) {
  if (ompt_multiplex_own_callbacks.ompt_callback_device_initialize) {
    ompt_multiplex_own_callbacks.ompt_callback_device_initialize(
        device_num, type, device, lookup, documentation);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_device_initialize) {
    ompt_multiplex_client_callbacks.ompt_callback_device_initialize(
        device_num, type, device, lookup, documentation);
  }
}

static void ompt_multiplex_callback_device_finalize(int device_num) {
  if (ompt_multiplex_own_callbacks.ompt_callback_device_finalize) {
    ompt_multiplex_own_callbacks.ompt_callback_device_finalize(device_num);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_device_finalize) {
    ompt_multiplex_client_callbacks.ompt_callback_device_finalize(device_num);
  }
}

static void
ompt_multiplex_callback_device_load(int device_num, const char *filename,
                                    int64_t offset_in_file, void *vma_in_file,
                                    size_t bytes, void *host_addr,
                                    void *device_addr, uint64_t module_id) {
  if (ompt_multiplex_own_callbacks.ompt_callback_device_load) {
    ompt_multiplex_own_callbacks.ompt_callback_device_load(
        device_num, filename, offset_in_file, vma_in_file, bytes, host_addr,
        device_addr, module_id);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_device_load) {
    ompt_multiplex_client_callbacks.ompt_callback_device_load(
        device_num, filename, offset_in_file, vma_in_file, bytes, host_addr,
        device_addr, module_id);
  }
}

static void ompt_multiplex_callback_device_unload(int device_num,
                                                  uint64_t module_id) {
  if (ompt_multiplex_own_callbacks.ompt_callback_device_unload) {
    ompt_multiplex_own_callbacks.ompt_callback_device_unload(device_num,
                                                             module_id);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_device_unload) {
    ompt_multiplex_client_callbacks.ompt_callback_device_unload(device_num,
                                                                module_id);
  }
}

static void
ompt_multiplex_callback_target_map(ompt_id_t target_id, unsigned int nitems,
                                   void **host_addr, void **device_addr,
                                   size_t *bytes, unsigned int *mapping_flags,
                                   const void *codeptr_ra) {
  if (ompt_multiplex_own_callbacks.ompt_callback_target_map) {
    ompt_multiplex_own_callbacks.ompt_callback_target_map(
        target_id, nitems, host_addr, device_addr, bytes, mapping_flags,
        codeptr_ra);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_target_map) {
    ompt_multiplex_client_callbacks.ompt_callback_target_map(
        target_id, nitems, host_addr, device_addr, bytes, mapping_flags,
        codeptr_ra);
  }
}

static void ompt_multiplex_callback_reduction(ompt_sync_region_t kind,
                                              ompt_scope_endpoint_t endpoint,
                                              ompt_data_t *parallel_data,
                                              ompt_data_t *task_data,
                                              const void *codeptr_ra) {
  if (ompt_multiplex_own_callbacks.ompt_callback_reduction) {
    ompt_multiplex_own_callbacks.ompt_callback_reduction(
        kind, endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
        ompt_multiplex_get_own_task_data(task_data), codeptr_ra);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_reduction) {
    ompt_multiplex_client_callbacks.ompt_callback_reduction(
        kind, endpoint, ompt_multiplex_get_client_parallel_data(parallel_data),
        ompt_multiplex_get_client_task_data(task_data), codeptr_ra);
  }
}

static void ompt_multiplex_callback_dispatch(ompt_data_t *parallel_data,
                                             ompt_data_t *task_data,
                                             ompt_dispatch_t kind,
                                             ompt_data_t instance) {
  if (ompt_multiplex_own_callbacks.ompt_callback_dispatch) {
    ompt_multiplex_own_callbacks.ompt_callback_dispatch(
        ompt_multiplex_get_own_parallel_data(parallel_data),
        ompt_multiplex_get_own_task_data(task_data), kind, instance);
  }
  if (ompt_multiplex_client_callbacks.ompt_callback_dispatch) {
    ompt_multiplex_client_callbacks.ompt_callback_dispatch(
        ompt_multiplex_get_client_parallel_data(parallel_data),
        ompt_multiplex_get_client_task_data(task_data), kind, instance);
  }
}

// runtime entry functions

int ompt_multiplex_own_get_task_info(int ancestor_level, int *type,
                                     ompt_data_t **task_data,
                                     ompt_frame_t **task_frame,
                                     ompt_data_t **parallel_data,
                                     int *thread_num) {
  int ret = ompt_multiplex_get_task_info(ancestor_level, type, task_data,
                                         task_frame, parallel_data, thread_num);

#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
  if (task_data)
    *task_data = ompt_multiplex_get_own_ompt_data(*task_data);
#endif
#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
  if (parallel_data)
    *parallel_data = ompt_multiplex_get_own_ompt_data(*parallel_data);
#endif
  return ret;
}

int ompt_multiplex_client_get_task_info(int ancestor_level, int *type,
                                        ompt_data_t **task_data,
                                        ompt_frame_t **task_frame,
                                        ompt_data_t **parallel_data,
                                        int *thread_num) {
  int ret = ompt_multiplex_get_task_info(ancestor_level, type, task_data,
                                         task_frame, parallel_data, thread_num);

  if (task_data)
#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
    *task_data = ompt_multiplex_get_client_ompt_data(*task_data);
#else
    *task_data = OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA(*task_data);
#endif

  if (parallel_data)
#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
    *parallel_data = ompt_multiplex_get_client_ompt_data(*parallel_data);
#else
    *parallel_data =
        OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA(*parallel_data);
#endif
  return ret;
}

ompt_data_t *ompt_multiplex_own_get_thread_data() {
  ompt_data_t *ret;
#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA
  ret = ompt_multiplex_get_own_ompt_data(ompt_multiplex_get_thread_data());
#else
  ret = ompt_multiplex_get_thread_data();
#endif
  return ret;
}

ompt_data_t *ompt_multiplex_client_get_thread_data() {
  ompt_data_t *ret;
#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA
  ret = ompt_multiplex_get_client_ompt_data(ompt_multiplex_get_thread_data());
#else
  ret = OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA(
      ompt_multiplex_get_thread_data());
#endif
  return ret;
}

int ompt_multiplex_own_get_parallel_info(int ancestor_level,
                                         ompt_data_t **parallel_data,
                                         int *team_size) {
  int ret = ompt_multiplex_get_parallel_info(ancestor_level, parallel_data,
                                             team_size);
  if (parallel_data)
    *parallel_data = ompt_multiplex_get_own_parallel_data(*parallel_data);
  return ret;
}

int ompt_multiplex_client_get_parallel_info(int ancestor_level,
                                            ompt_data_t **parallel_data,
                                            int *team_size) {
  int ret = ompt_multiplex_get_parallel_info(ancestor_level, parallel_data,
                                             team_size);
  if (parallel_data)
#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
    *parallel_data = ompt_multiplex_get_client_ompt_data(*parallel_data);
#else
    *parallel_data =
        OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA(*parallel_data);
#endif
  return ret;
}

OMPT_API_ROUTINE int ompt_multiplex_own_set_callback(ompt_callbacks_t which,
                                                     ompt_callback_t callback) {
  switch (which) {

#define ompt_event_macro(event_name, callback_type, event_id)                  \
  case ompt_##event_name:                                                      \
    ompt_multiplex_own_callbacks.ompt_##event_name = (callback_type)callback;  \
    if (ompt_multiplex_implementation_status.ompt_##event_name == -1)          \
      return ompt_multiplex_implementation_status.ompt_##event_name =          \
                 ompt_multiplex_set_callback(                                  \
                     ompt_##event_name,                                        \
                     (ompt_callback_t)&ompt_multiplex_##event_name);           \
    else                                                                       \
      return ompt_multiplex_implementation_status.ompt_##event_name

    OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro)

#undef ompt_event_macro

  default:
    return ompt_set_error;
  }
}

OMPT_API_ROUTINE int
ompt_multiplex_client_set_callback(ompt_callbacks_t which,
                                   ompt_callback_t callback) {
  switch (which) {

#define ompt_event_macro(event_name, callback_type, event_id)                  \
  case ompt_##event_name:                                                      \
    ompt_multiplex_client_callbacks.ompt_##event_name =                        \
        (callback_type)callback;                                               \
    if (ompt_multiplex_implementation_status.ompt_##event_name == -1)          \
      return ompt_multiplex_implementation_status.ompt_##event_name =          \
                 ompt_multiplex_set_callback(                                  \
                     ompt_##event_name,                                        \
                     (ompt_callback_t)&ompt_multiplex_##event_name);           \
    else                                                                       \
      return ompt_multiplex_implementation_status.ompt_##event_name

    OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro)

#undef ompt_event_macro

  default:
    return ompt_set_error;
  }
}

ompt_interface_fn_t ompt_multiplex_own_lookup(const char *name) {
  if (!strcmp(name, "ompt_set_callback"))
    return (ompt_interface_fn_t)&ompt_multiplex_own_set_callback;
  else if (!strcmp(name, "ompt_get_task_info"))
    return (ompt_interface_fn_t)&ompt_multiplex_own_get_task_info;
  else if (!strcmp(name, "ompt_get_thread_data"))
    return (ompt_interface_fn_t)&ompt_multiplex_own_get_thread_data;
  else if (!strcmp(name, "ompt_get_parallel_info"))
    return (ompt_interface_fn_t)&ompt_multiplex_own_get_parallel_info;
  else
    return ompt_multiplex_lookup_function(name);
}

ompt_interface_fn_t ompt_multiplex_client_lookup(const char *name) {
  if (!strcmp(name, "ompt_set_callback"))
    return (ompt_interface_fn_t)&ompt_multiplex_client_set_callback;
  else if (!strcmp(name, "ompt_get_task_info"))
    return (ompt_interface_fn_t)&ompt_multiplex_client_get_task_info;
  else if (!strcmp(name, "ompt_get_thread_data"))
    return (ompt_interface_fn_t)&ompt_multiplex_client_get_thread_data;
  else if (!strcmp(name, "ompt_get_parallel_info"))
    return (ompt_interface_fn_t)&ompt_multiplex_client_get_parallel_info;
  else
    return ompt_multiplex_lookup_function(name);
}

int ompt_multiplex_initialize(ompt_function_lookup_t lookup,
                              int initial_device_num, ompt_data_t *data) {
  ompt_multiplex_lookup_function = lookup;
  ompt_multiplex_set_callback =
      (ompt_set_callback_t)lookup("ompt_set_callback");
  ompt_multiplex_get_task_info =
      (ompt_get_task_info_t)lookup("ompt_get_task_info");
  ompt_multiplex_get_thread_data =
      (ompt_get_thread_data_t)lookup("ompt_get_thread_data");
  ompt_multiplex_get_parallel_info =
      (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info");

  // initialize ompt_multiplex_implementation_status
#define ompt_event_macro(event_name, callback_type, event_id)                  \
  ompt_multiplex_implementation_status.ompt_##event_name = -1

  OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro)

#undef ompt_event_macro

  int ownRet = ompt_multiplex_own_fns->initialize(
      ompt_multiplex_own_lookup, initial_device_num,
      &(ompt_multiplex_own_fns->tool_data));
  int clientRet = 0;
  if (ompt_multiplex_client_fns)
    clientRet = ompt_multiplex_client_fns->initialize(
        ompt_multiplex_client_lookup, initial_device_num,
        &(ompt_multiplex_client_fns->tool_data));

  return ownRet > clientRet ? ownRet : clientRet;
}

void ompt_multiplex_finalize(ompt_data_t *fns) {
  if (ompt_multiplex_client_fns)
    ompt_multiplex_client_fns->finalize(
        &(ompt_multiplex_client_fns->tool_data));
  ompt_multiplex_own_fns->finalize(&(ompt_multiplex_own_fns->tool_data));
}

#ifdef __cplusplus
extern "C" {
#endif

// forward declaration because of name shifting from ompt_start_tool
// to ompt_multiplex_own_start_tool below
ompt_start_tool_result_t *
ompt_multiplex_own_start_tool(unsigned int omp_version,
                              const char *runtime_version);

ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version,
                                          const char *runtime_version) {
  setup_verbose_init();
  OMPT_VERBOSE_INIT_PRINT(
      "----- START LOGGING OF CLIENT TOOL REGISTRATION -----\n");
  // try loading client tool
  OMPT_VERBOSE_INIT_PRINT("Search for " CLIENT_TOOL_LIBRARIES_VAR
                          " env var... ");
  const char *tool_libs = getenv(CLIENT_TOOL_LIBRARIES_VAR);
  if (tool_libs) {
    OMPT_VERBOSE_INIT_CONTINUED_PRINT("Sucess.\n");
    OMPT_VERBOSE_INIT_PRINT(CLIENT_TOOL_LIBRARIES_VAR " = %s\n", tool_libs);
    // copy environement variable
    char *tool_libs_buffer = strdup(tool_libs);
    if (!tool_libs_buffer) {
      printf("strdup Error (%i)\n", errno);
      exit(-1);
    }

    int progress = 0;
    // Reset dl-error
    dlerror();
    while (progress < strlen(tool_libs)) {
      ompt_multiplex_client_fns = NULL;
      ompt_start_tool_result_t *(*client_start_tool)(unsigned int,
                                                     const char *) = NULL;
      OMPT_VERBOSE_INIT_PRINT(
          "Look for candidates within " CLIENT_TOOL_LIBRARIES_VAR "...\n");
      int tmp_progress = progress;
      while (tmp_progress < strlen(tool_libs) &&
             tool_libs_buffer[tmp_progress] != ':')
        tmp_progress++;
      if (tmp_progress < strlen(tool_libs))
        tool_libs_buffer[tmp_progress] = 0;
      OMPT_VERBOSE_INIT_PRINT("Try out one candidate...\n");
      char *fname = tool_libs_buffer + progress;
      OMPT_VERBOSE_INIT_PRINT("Opening %s... ", fname);
      void *h = dlopen(fname, RTLD_LAZY);
      if (h) {
        client_start_tool =
            (ompt_start_tool_result_t * (*)(unsigned int, const char *))
                dlsym(h, "ompt_start_tool");
        if (client_start_tool &&
            (ompt_multiplex_client_fns =
                 (*client_start_tool)(omp_version, runtime_version))) {
          OMPT_VERBOSE_INIT_CONTINUED_PRINT("Sucess.\n");
          OMPT_VERBOSE_INIT_PRINT(
              "Tool was started and is using the OMPT interface.\n");
          break;
        } else {
          OMPT_VERBOSE_INIT_CONTINUED_PRINT(
              "Failed: client_start_tool = %p, ompt_multiplex_client_fns = %p, "
              "%s\n",
              client_start_tool, ompt_multiplex_client_fns, dlerror());
        }
      } else {
        OMPT_VERBOSE_INIT_CONTINUED_PRINT("Failed: %s\n", dlerror());
        printf("Loading %s from %s failed with: %s\n",
               tool_libs_buffer + progress, CLIENT_TOOL_LIBRARIES_VAR,
               dlerror());
      }
      progress = tmp_progress + 1;
    }
    free(tool_libs_buffer);
    OMPT_VERBOSE_INIT_PRINT(
        "----- END LOGGING OF CLIENT TOOL REGISTRATION -----\n");
  }
  // load own tool
  OMPT_VERBOSE_INIT_PRINT(
      "----- START LOGGING OF OWN TOOL REGISTRATION -----\n");
  ompt_multiplex_own_fns =
      ompt_multiplex_own_start_tool(omp_version, runtime_version);
  OMPT_VERBOSE_INIT_PRINT("ompt_multiplex_own_fns = %p\n",
                          ompt_multiplex_own_fns);
  OMPT_VERBOSE_INIT_PRINT("----- END LOGGING OF OWN TOOL REGISTRATION -----\n");
  // return multiplexed versions
  static ompt_start_tool_result_t ompt_start_tool_result = {
      &ompt_multiplex_initialize, &ompt_multiplex_finalize, {0}};
  if (verbose_init && verbose_file != stderr && verbose_file != stdout)
    fclose(verbose_file);
  if (!ompt_multiplex_client_fns)
    return ompt_multiplex_own_fns;
  if (!ompt_multiplex_own_fns)
    return ompt_multiplex_client_fns;
  return &ompt_start_tool_result;
}
#ifdef __cplusplus
}
#endif

// We rename the ompt_start_tool function of the OMPT tool and call the
// renamed function from the ompt_start_tool function defined above.
#define ompt_start_tool ompt_multiplex_own_start_tool

#endif /* OMPT_MULTIPLEX_H */