option(FLANG_EXPERIMENTAL_CUDA_RUNTIME
"Compile Fortran runtime as CUDA sources (experimental)" OFF
)
option(FLANG_CUDA_RUNTIME_PTX_WITHOUT_GLOBAL_VARS
"Do not compile global variables' definitions when producing PTX library" OFF
)
set(FLANG_LIBCUDACXX_PATH "" CACHE PATH "Path to libcu++ package installation")
set(FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD "off" CACHE STRING
"Compile Fortran runtime as OpenMP target offload sources (experimental). Valid options are 'off', 'host_device', 'nohost'")
set(FLANG_OMP_DEVICE_ARCHITECTURES "all" CACHE STRING
"List of OpenMP device architectures to be used to compile the Fortran runtime (e.g. 'gfx1103;sm_90')")
macro(enable_cuda_compilation name files)
if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
if (BUILD_SHARED_LIBS)
message(FATAL_ERROR
"BUILD_SHARED_LIBS is not supported for CUDA build of Fortran runtime"
)
endif()
enable_language(CUDA)
# TODO: figure out how to make target property CUDA_SEPARABLE_COMPILATION
# work, and avoid setting CMAKE_CUDA_SEPARABLE_COMPILATION.
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
# Treat all supported sources as CUDA files.
set_source_files_properties(${files} PROPERTIES LANGUAGE CUDA)
set(CUDA_COMPILE_OPTIONS)
if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "Clang")
# Allow varargs.
set(CUDA_COMPILE_OPTIONS
-Xclang -fcuda-allow-variadic-functions
)
endif()
if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "NVIDIA")
set(CUDA_COMPILE_OPTIONS
--expt-relaxed-constexpr
# Disable these warnings:
# 'long double' is treated as 'double' in device code
-Xcudafe --diag_suppress=20208
-Xcudafe --display_error_number
)
endif()
set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS
"${CUDA_COMPILE_OPTIONS}"
)
if (EXISTS "${FLANG_LIBCUDACXX_PATH}/include")
# When using libcudacxx headers files, we have to use them
# for all files of F18 runtime.
include_directories(AFTER ${FLANG_LIBCUDACXX_PATH}/include)
add_compile_definitions(RT_USE_LIBCUDACXX=1)
endif()
# Add an OBJECT library consisting of CUDA PTX.
llvm_add_library(${name}PTX OBJECT PARTIAL_SOURCES_INTENDED ${files})
set_property(TARGET obj.${name}PTX PROPERTY CUDA_PTX_COMPILATION ON)
if (FLANG_CUDA_RUNTIME_PTX_WITHOUT_GLOBAL_VARS)
target_compile_definitions(obj.${name}PTX
PRIVATE FLANG_RUNTIME_NO_GLOBAL_VAR_DEFS
)
endif()
endif()
endmacro()
macro(enable_omp_offload_compilation files)
if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "off")
# 'host_device' build only works with Clang compiler currently.
# The build is done with the CMAKE_C/CXX_COMPILER, i.e. it does not use
# the in-tree built Clang. We may have a mode that would use the in-tree
# built Clang.
#
# 'nohost' is supposed to produce an LLVM Bitcode library,
# and it has to be done with a C/C++ compiler producing LLVM Bitcode
# compatible with the LLVM toolchain version distributed with the Flang
# compiler.
# In general, the in-tree built Clang should be used for 'nohost' build.
# Note that 'nohost' build does not produce the host version of Flang
# runtime library, so there will be two separate distributable objects.
# 'nohost' build is a TODO.
if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "host_device")
message(FATAL_ERROR "Unsupported OpenMP offload build of Flang runtime")
endif()
if (BUILD_SHARED_LIBS)
message(FATAL_ERROR
"BUILD_SHARED_LIBS is not supported for OpenMP offload build of Fortran runtime"
)
endif()
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND
"${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
set(all_amdgpu_architectures
"gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
"gfx908;gfx90a;gfx90c;gfx940;gfx1010;gfx1030"
"gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036"
"gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151"
"gfx1152"
)
set(all_nvptx_architectures
"sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
"sm_70;sm_72;sm_75;sm_80;sm_86;sm_89;sm_90"
)
set(all_gpu_architectures
"${all_amdgpu_architectures};${all_nvptx_architectures}"
)
# TODO: support auto detection on the build system.
if (FLANG_OMP_DEVICE_ARCHITECTURES STREQUAL "all")
set(FLANG_OMP_DEVICE_ARCHITECTURES ${all_gpu_architectures})
endif()
list(REMOVE_DUPLICATES FLANG_OMP_DEVICE_ARCHITECTURES)
string(REPLACE ";" "," compile_for_architectures
"${FLANG_OMP_DEVICE_ARCHITECTURES}"
)
set(OMP_COMPILE_OPTIONS
-fopenmp
-fvisibility=hidden
-fopenmp-cuda-mode
--offload-arch=${compile_for_architectures}
# Force LTO for the device part.
-foffload-lto
)
set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS
"${OMP_COMPILE_OPTIONS}"
)
# Enable "declare target" in the source code.
set_source_files_properties(${files}
PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD
)
else()
message(FATAL_ERROR
"Flang runtime build is not supported for these compilers:\n"
"CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}\n"
"CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
endif()
endif()
endmacro()