mirror of
https://github.com/NixOS/nixpkgs.git
synced 2025-04-15 12:27:57 +00:00
rocfft: split kernel compilation into separate derivations
To avoid output limit exceeded errors in hydra, we build kernel device libs and the kernel RTC cache database in separate derivations
This commit is contained in:
parent
e88bc03e4b
commit
c4a937a9f6
@ -1,22 +1,93 @@
|
||||
{ lib
|
||||
{ rocfft
|
||||
, lib
|
||||
, stdenv
|
||||
, fetchFromGitHub
|
||||
, rocmUpdateScript
|
||||
, cmake
|
||||
, rocm-cmake
|
||||
, rocrand
|
||||
, hip
|
||||
, openmp
|
||||
, sqlite
|
||||
, python3
|
||||
, gtest
|
||||
, rocm-cmake
|
||||
, sqlite
|
||||
, boost
|
||||
, fftw
|
||||
, fftwFloat
|
||||
, gtest
|
||||
, openmp
|
||||
, rocrand
|
||||
, buildTests ? false
|
||||
, buildBenchmarks ? false
|
||||
# NOTE: Update the default GPU targets on every update
|
||||
, gpuTargets ? [
|
||||
"gfx803"
|
||||
"gfx900"
|
||||
"gfx906"
|
||||
"gfx908"
|
||||
"gfx90a"
|
||||
"gfx1030"
|
||||
"gfx1100"
|
||||
"gfx1102"
|
||||
]
|
||||
}:
|
||||
|
||||
let
|
||||
# To avoid output limit exceeded errors in hydra, we build kernel
|
||||
# device libs and the kernel RTC cache database in separate derivations
|
||||
kernelDeviceLibs = map
|
||||
(target:
|
||||
(rocfft.overrideAttrs (prevAttrs: {
|
||||
pname = "rocfft-device-${target}";
|
||||
|
||||
patches = prevAttrs.patches ++ [
|
||||
# Add back install rule for device library
|
||||
# This workaround is needed because rocm_install_targets
|
||||
# doesn't support an EXCLUDE_FROM_ALL option
|
||||
./device-install.patch
|
||||
];
|
||||
|
||||
buildFlags = [ "rocfft-device-${target}" ];
|
||||
|
||||
installPhase = ''
|
||||
runHook preInstall
|
||||
cmake --install . --component device
|
||||
runHook postInstall
|
||||
'';
|
||||
|
||||
requiredSystemFeatures = [ "big-parallel" ];
|
||||
})).override {
|
||||
buildTests = false;
|
||||
buildBenchmarks = false;
|
||||
gpuTargets = [ target ];
|
||||
}
|
||||
)
|
||||
gpuTargets;
|
||||
|
||||
# TODO: Figure out how to also split this by GPU target
|
||||
#
|
||||
# It'll be bit more complicated than what we're doing for the kernel
|
||||
# device libs, because the kernel cache needs to be compiled into
|
||||
# one sqlite database (whereas the device libs can be linked into
|
||||
# rocfft as separate libraries for each GPU target).
|
||||
#
|
||||
# It's not clear why this needs to even be a db in the first place.
|
||||
# It would simplify things A LOT if we could just store these
|
||||
# pre-compiled kernels as files (but that'd need a lot of patching).
|
||||
kernelRtcCache = (rocfft.overrideAttrs (_: {
|
||||
pname = "rocfft-kernel-cache";
|
||||
|
||||
buildFlags = [ "rocfft_kernel_cache_target" ];
|
||||
|
||||
installPhase = ''
|
||||
runHook preInstall
|
||||
cmake --install . --component kernel_cache
|
||||
runHook postInstall
|
||||
'';
|
||||
|
||||
requiredSystemFeatures = [ "big-parallel" ];
|
||||
})).override {
|
||||
buildTests = false;
|
||||
buildBenchmarks = false;
|
||||
};
|
||||
in
|
||||
stdenv.mkDerivation (finalAttrs: {
|
||||
pname = "rocfft";
|
||||
version = "5.4.3";
|
||||
@ -36,23 +107,29 @@ stdenv.mkDerivation (finalAttrs: {
|
||||
hash = "sha256-FsefE0B2hF5ZcHDB6TscwFeZ1NKFkWX7VDpEvvbDbOk=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
rocm-cmake
|
||||
hip
|
||||
patches = [
|
||||
# Exclude kernel compilation & installation from "all" target,
|
||||
# and split device libraries by GPU target
|
||||
./split-kernel-compilation.patch
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
sqlite
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
hip
|
||||
python3
|
||||
rocm-cmake
|
||||
];
|
||||
|
||||
buildInputs = (lib.optionals (finalAttrs.pname == "rocfft") kernelDeviceLibs) ++ [
|
||||
sqlite
|
||||
] ++ lib.optionals buildTests [
|
||||
gtest
|
||||
] ++ lib.optionals (buildTests || buildBenchmarks) [
|
||||
rocrand
|
||||
boost
|
||||
fftw
|
||||
fftwFloat
|
||||
openmp
|
||||
rocrand
|
||||
];
|
||||
|
||||
propagatedBuildInputs = lib.optionals buildTests [
|
||||
@ -70,6 +147,7 @@ stdenv.mkDerivation (finalAttrs: {
|
||||
"-DCMAKE_INSTALL_BINDIR=bin"
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
||||
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
||||
] ++ lib.optionals buildTests [
|
||||
"-DBUILD_CLIENTS_TESTS=ON"
|
||||
] ++ lib.optionals buildBenchmarks [
|
||||
@ -77,7 +155,9 @@ stdenv.mkDerivation (finalAttrs: {
|
||||
"-DBUILD_CLIENTS_SAMPLES=ON"
|
||||
];
|
||||
|
||||
postInstall = lib.optionalString buildTests ''
|
||||
postInstall = lib.optionalString (finalAttrs.pname == "rocfft") ''
|
||||
ln -s ${kernelRtcCache}/lib/rocfft_kernel_cache.db "$out/lib"
|
||||
'' + lib.optionalString buildTests ''
|
||||
mkdir -p $test/{bin,lib/fftw}
|
||||
cp -a $out/bin/* $test/bin
|
||||
ln -s ${fftw}/lib/libfftw*.so $test/lib/fftw
|
||||
@ -101,10 +181,10 @@ stdenv.mkDerivation (finalAttrs: {
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "FFT implementation for ROCm ";
|
||||
description = "FFT implementation for ROCm";
|
||||
homepage = "https://github.com/ROCmSoftwarePlatform/rocFFT";
|
||||
license = with licenses; [ mit ];
|
||||
maintainers = teams.rocm.members;
|
||||
maintainers = with maintainers; [ kira-bruneau ] ++ teams.rocm.members;
|
||||
platforms = platforms.linux;
|
||||
broken = versions.minor finalAttrs.version != versions.minor hip.version;
|
||||
};
|
||||
|
15
pkgs/development/libraries/rocfft/device-install.patch
Normal file
15
pkgs/development/libraries/rocfft/device-install.patch
Normal file
@ -0,0 +1,15 @@
|
||||
diff --git a/library/src/device/CMakeLists.txt b/library/src/device/CMakeLists.txt
|
||||
index 73a8ec9..9bfd4b8 100644
|
||||
--- a/library/src/device/CMakeLists.txt
|
||||
+++ b/library/src/device/CMakeLists.txt
|
||||
@@ -255,4 +255,10 @@ foreach( sub ${AMDGPU_TARGETS} )
|
||||
if( NOT BUILD_SHARED_LIBS )
|
||||
set_target_properties( rocfft-device-${sub} PROPERTIES PREFIX "lib" )
|
||||
endif( )
|
||||
+
|
||||
+ rocm_install_targets(
|
||||
+ TARGETS
|
||||
+ rocfft-device-${sub}
|
||||
+ COMPONENT device
|
||||
+ )
|
||||
endforeach()
|
124
pkgs/development/libraries/rocfft/split-kernel-compilation.patch
Normal file
124
pkgs/development/libraries/rocfft/split-kernel-compilation.patch
Normal file
@ -0,0 +1,124 @@
|
||||
diff --git a/library/src/CMakeLists.txt b/library/src/CMakeLists.txt
|
||||
index 3a16304..606b711 100644
|
||||
--- a/library/src/CMakeLists.txt
|
||||
+++ b/library/src/CMakeLists.txt
|
||||
@@ -250,12 +250,12 @@ foreach( target
|
||||
|
||||
endforeach()
|
||||
|
||||
-add_executable( rocfft_aot_helper
|
||||
+add_executable( rocfft_aot_helper EXCLUDE_FROM_ALL
|
||||
rocfft_aot_helper.cpp
|
||||
rocfft_stub.cpp
|
||||
)
|
||||
|
||||
-add_executable( rocfft_config_search
|
||||
+add_executable( rocfft_config_search EXCLUDE_FROM_ALL
|
||||
rocfft_config_search.cpp
|
||||
rocfft_stub.cpp
|
||||
)
|
||||
@@ -279,10 +279,10 @@ endif()
|
||||
|
||||
target_link_libraries( rocfft PRIVATE ${ROCFFT_DEVICE_LINK_LIBS} )
|
||||
|
||||
-target_link_libraries( rocfft PRIVATE rocfft-device-0 )
|
||||
-target_link_libraries( rocfft PRIVATE rocfft-device-1 )
|
||||
-target_link_libraries( rocfft PRIVATE rocfft-device-2 )
|
||||
-target_link_libraries( rocfft PRIVATE rocfft-device-3 )
|
||||
+foreach( sub ${AMDGPU_TARGETS} )
|
||||
+ target_link_libraries( rocfft PRIVATE -lrocfft-device-${sub} )
|
||||
+endforeach()
|
||||
+
|
||||
foreach( target rocfft rocfft_aot_helper rocfft_config_search )
|
||||
# RTC uses dladdr to find the RTC helper program
|
||||
if( NOT WIN32 )
|
||||
@@ -347,7 +347,7 @@ add_custom_command(
|
||||
DEPENDS rocfft_aot_helper rocfft_rtc_helper
|
||||
COMMENT "Compile kernels into shipped cache file"
|
||||
)
|
||||
-add_custom_target( rocfft_kernel_cache_target ALL
|
||||
+add_custom_target( rocfft_kernel_cache_target
|
||||
DEPENDS rocfft_kernel_cache.db
|
||||
VERBATIM
|
||||
)
|
||||
@@ -392,7 +392,8 @@ else()
|
||||
endif()
|
||||
rocm_install(FILES ${ROCFFT_KERNEL_CACHE_PATH}
|
||||
DESTINATION "${ROCFFT_KERNEL_CACHE_INSTALL_DIR}"
|
||||
- COMPONENT runtime
|
||||
+ COMPONENT kernel_cache
|
||||
+ EXCLUDE_FROM_ALL
|
||||
)
|
||||
|
||||
# PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ
|
||||
diff --git a/library/src/device/CMakeLists.txt b/library/src/device/CMakeLists.txt
|
||||
index 9f7b85f..73a8ec9 100644
|
||||
--- a/library/src/device/CMakeLists.txt
|
||||
+++ b/library/src/device/CMakeLists.txt
|
||||
@@ -170,11 +170,11 @@ list( SORT rocfft_device_source )
|
||||
# functions callable by rocFFT and depends on amdhip64, and another
|
||||
# one usable by AOT RTC that contains no device code
|
||||
list( FILTER rocfft_device_source EXCLUDE REGEX function_pool.cpp )
|
||||
-add_library( rocfft-function-pool OBJECT
|
||||
+add_library( rocfft-function-pool OBJECT EXCLUDE_FROM_ALL
|
||||
function_pool.cpp
|
||||
)
|
||||
target_compile_definitions( rocfft-function-pool PRIVATE FUNCTION_POOL_STANDALONE_BODY= )
|
||||
-add_library( rocfft-function-pool-standalone OBJECT
|
||||
+add_library( rocfft-function-pool-standalone OBJECT EXCLUDE_FROM_ALL
|
||||
function_pool.cpp
|
||||
)
|
||||
target_compile_definitions( rocfft-function-pool-standalone PRIVATE FUNCTION_POOL_STANDALONE_BODY={} )
|
||||
@@ -193,26 +193,15 @@ foreach( pool rocfft-function-pool rocfft-function-pool-standalone )
|
||||
add_dependencies(${pool} gen_headers_target)
|
||||
endforeach()
|
||||
|
||||
-list( LENGTH rocfft_device_source rocfft_device_source_len )
|
||||
-math(EXPR split_len "${rocfft_device_source_len} / 4")
|
||||
-math(EXPR split_idx_2 "${rocfft_device_source_len} / 4 * 2")
|
||||
-math(EXPR split_idx_3 "${rocfft_device_source_len} / 4 * 3")
|
||||
-
|
||||
-list( SUBLIST rocfft_device_source 0 ${split_len} rocfft_device_source_0 )
|
||||
-list( SUBLIST rocfft_device_source ${split_len} ${split_len} rocfft_device_source_1 )
|
||||
-list( SUBLIST rocfft_device_source ${split_idx_2} ${split_len} rocfft_device_source_2 )
|
||||
-list( SUBLIST rocfft_device_source ${split_idx_3} -1 rocfft_device_source_3 )
|
||||
-
|
||||
-foreach( sub RANGE 3 )
|
||||
- set( rocfft_device_source_var rocfft_device_source_${sub} )
|
||||
+foreach( sub ${AMDGPU_TARGETS} )
|
||||
if(NOT SINGLELIB)
|
||||
- add_library( rocfft-device-${sub}
|
||||
- ${${rocfft_device_source_var}} )
|
||||
+ add_library( rocfft-device-${sub} EXCLUDE_FROM_ALL
|
||||
+ ${rocfft_device_source} )
|
||||
else()
|
||||
# Compile the device lib as a static library, which is then linked
|
||||
# into librocfft.so Useful for testing purposes.
|
||||
- add_library( rocfft-device-${sub} STATIC
|
||||
- ${${rocfft_device_source_var}} )
|
||||
+ add_library( rocfft-device-${sub} STATIC EXCLUDE_FROM_ALL
|
||||
+ ${rocfft_device_source} )
|
||||
|
||||
# if we're building singlelib, we don't want to export any of the
|
||||
# device library symbols to the main library
|
||||
@@ -241,9 +230,7 @@ foreach( sub RANGE 3 )
|
||||
# Set AMD GPU architecture options
|
||||
|
||||
# Enable compilation of desired architectures
|
||||
- foreach( target ${AMDGPU_TARGETS} )
|
||||
- target_compile_options( rocfft-device-${sub} PRIVATE --offload-arch=${target} )
|
||||
- endforeach( )
|
||||
+ target_compile_options( rocfft-device-${sub} PRIVATE --offload-arch=${sub} )
|
||||
|
||||
target_include_directories( rocfft-device-${sub}
|
||||
PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
|
||||
@@ -268,9 +255,4 @@ foreach( sub RANGE 3 )
|
||||
if( NOT BUILD_SHARED_LIBS )
|
||||
set_target_properties( rocfft-device-${sub} PROPERTIES PREFIX "lib" )
|
||||
endif( )
|
||||
-
|
||||
- rocm_install_targets(
|
||||
- TARGETS
|
||||
- rocfft-device-${sub}
|
||||
- )
|
||||
endforeach()
|
Loading…
Reference in New Issue
Block a user