From 5b7e0ad0962ca3e50879d008f0ea98ead9ee051f Mon Sep 17 00:00:00 2001 From: Phillip Seeber Date: Thu, 5 Oct 2023 17:29:12 +0200 Subject: [PATCH 1/5] spla: init at 1.0.6 spla: fix openmp on darwin --- pkgs/by-name/sp/spla/package.nix | 66 ++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 pkgs/by-name/sp/spla/package.nix diff --git a/pkgs/by-name/sp/spla/package.nix b/pkgs/by-name/sp/spla/package.nix new file mode 100644 index 000000000000..3143fbeb7316 --- /dev/null +++ b/pkgs/by-name/sp/spla/package.nix @@ -0,0 +1,66 @@ +{ stdenv +, lib +, fetchFromGitHub +, cmake +, mpi +, blas +, gfortran +, llvmPackages +, gpuBackend ? "none" +, cudaPackages +, hip +, rocblas +}: + +assert builtins.elem gpuBackend [ "none" "cuda" "rocm" ]; + +stdenv.mkDerivation rec { + pname = "spla"; + version = "1.5.5"; + + src = fetchFromGitHub { + owner = "eth-cscs"; + repo = pname; + rev = "v${version}"; + hash = "sha256-71QpwTsRogH+6Bik9DKwezl9SqwoLxQt4SZ7zw5X6DE="; + }; + + postPatch = '' + substituteInPlace src/gpu_util/gpu_blas_api.hpp \ + --replace '#include ' '#include ' + ''; + + nativeBuildInputs = [ + cmake + gfortran + ]; + + buildInputs = [ + blas + ] + ++ lib.optional (gpuBackend == "cuda") cudaPackages.cudatoolkit + ++ lib.optionals (gpuBackend == "rocm") [ hip rocblas rocblas ] + ++ lib.optional stdenv.isDarwin llvmPackages.openmp + ; + + propagatedBuildInputs = [ mpi ]; + + cmakeFlags = [ + "-DSPLA_OMP=ON" + "-DSPLA_FORTRAN=ON" + "-DSPLA_INSTALL=ON" + # Required due to broken CMake files + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optional (gpuBackend == "cuda") "-DSPLA_GPU_BACKEND=CUDA" + ++ lib.optional (gpuBackend == "rocm") [ "-DSPLA_GPU_BACKEND=ROCM" ] + ; + + meta = with lib; { + description = "Specialized Parallel Linear Algebra, providing distributed GEMM functionality for specific matrix distributions with optional GPU acceleration"; + homepage = "https://github.com/eth-cscs/spla"; + license = licenses.bsd3; + maintainers = [ maintainers.sheepforce ];# + }; +} From 0365c4b1d07ad7d67388068d19e1d5055aec8dc7 Mon Sep 17 00:00:00 2001 From: Phillip Seeber Date: Thu, 5 Oct 2023 17:29:40 +0200 Subject: [PATCH 2/5] spfft: init at 1.0.6 spfft: fix openmp on darwin --- pkgs/by-name/sp/spfft/package.nix | 67 +++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 pkgs/by-name/sp/spfft/package.nix diff --git a/pkgs/by-name/sp/spfft/package.nix b/pkgs/by-name/sp/spfft/package.nix new file mode 100644 index 000000000000..dcc43ccd2446 --- /dev/null +++ b/pkgs/by-name/sp/spfft/package.nix @@ -0,0 +1,67 @@ +{ stdenv +, lib +, fetchFromGitHub +, fftw +, cmake +, mpi +, gfortran +, llvmPackages +, gpuBackend ? "none" +, cudaPackages +, hip +, rocfft +, hipfft +}: + +assert builtins.elem gpuBackend [ "none" "cuda" "rocm" ]; + +stdenv.mkDerivation rec { + pname = "SpFFT"; + version = "1.0.6"; + + src = fetchFromGitHub { + owner = "eth-cscs"; + repo = pname; + rev = "v${version}"; + hash = "sha256-70fPbIYbW50CoMdRS93hZKSbMEIQvZGFNE+eiRvuw0o="; + }; + + nativeBuildInputs = [ + cmake + gfortran + ]; + + buildInputs = [ + fftw + ] + ++ lib.optional (gpuBackend == "cuda") cudaPackages.cudatoolkit + ++ lib.optionals (gpuBackend == "rocm") [ hip rocfft hipfft ] + ++ lib.optional stdenv.isDarwin llvmPackages.openmp + ; + + propagatedBuildInputs = [ mpi ]; + + cmakeFlags = [ + "-DSPFFT_OMP=ON" + "-DSPFFT_MPI=ON" + "-DSPFFT_SINGLE_PRECISION=OFF" + "-DSPFFT_FORTRAN=ON" + # Required due to broken CMake files + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] + ++ lib.optional (gpuBackend == "cuda") "-DSPFFT_GPU_BACKEND=CUDA" + ++ lib.optionals (gpuBackend == "rocm") [ + "-DSPFFT_GPU_BACKEND=ROCM" + "-DHIP_ROOT_DIR=${hip}" + ]; + + + meta = with lib; { + description = "Sparse 3D FFT library with MPI, OpenMP, CUDA and ROCm support"; + homepage = "https://github.com/eth-cscs/SpFFT"; + license = licenses.bsd3; + maintainers = [ maintainers.sheepforce ]; + platforms = platforms.linux; + }; +} From 3a7879533e3d3d8f0ddbb1945942575b3a812d8b Mon Sep 17 00:00:00 2001 From: Phillip Seeber Date: Mon, 9 Oct 2023 10:46:30 +0200 Subject: [PATCH 3/5] costa: init at 2.2.2 costa: limit to linux --- pkgs/by-name/co/costa/package.nix | 40 +++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 pkgs/by-name/co/costa/package.nix diff --git a/pkgs/by-name/co/costa/package.nix b/pkgs/by-name/co/costa/package.nix new file mode 100644 index 000000000000..3dacce655055 --- /dev/null +++ b/pkgs/by-name/co/costa/package.nix @@ -0,0 +1,40 @@ +{ stdenv +, lib +, fetchFromGitHub +, cmake +, mpi +, scalapack +, llvmPackages +}: + +stdenv.mkDerivation rec { + pname = "COSTA"; + version = "2.2.2"; + + src = fetchFromGitHub { + owner = "eth-cscs"; + repo = pname; + rev = "v${version}"; + hash = "sha256-jiAyZXC7wiuEnOLsQFFLxhN3AsGXN09q/gHC2Hrb2gg="; + }; + + nativeBuildInputs = [ cmake ]; + + buildInputs = [ scalapack ] ++ lib.optional stdenv.isDarwin llvmPackages.openmp; + + propagatedBuildInputs = [ mpi ]; + + cmakeFlags = [ + "-DCOSTA_SCALAPACK=CUSTOM" + "-DSCALAPACK_ROOT=${scalapack}" + ]; + + + meta = with lib; { + description = "Distributed Communication-Optimal Shuffle and Transpose Algorithm"; + homepage = "https://github.com/eth-cscs/COSTA"; + license = licenses.bsd3; + platforms = platforms.linux; + maintainers = [ maintainers.sheepforce ]; + }; +} From 7b50f783336f57e5d39b160d78b8aa24d6b07ca3 Mon Sep 17 00:00:00 2001 From: Phillip Seeber Date: Mon, 9 Oct 2023 10:48:01 +0200 Subject: [PATCH 4/5] sirius: init at 7.4.3 sirius: limit to linux --- pkgs/by-name/si/sirius/package.nix | 117 +++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 pkgs/by-name/si/sirius/package.nix diff --git a/pkgs/by-name/si/sirius/package.nix b/pkgs/by-name/si/sirius/package.nix new file mode 100644 index 000000000000..05d049a7d45b --- /dev/null +++ b/pkgs/by-name/si/sirius/package.nix @@ -0,0 +1,117 @@ +{ stdenv +, lib +, fetchFromGitHub +, cmake +, pkg-config +, mpi +, mpiCheckPhaseHook +, openssh +, gfortran +, blas +, lapack +, gsl +, libxc +, hdf5 +, spglib +, spfft +, spla +, costa +, scalapack +, boost +, eigen +, libvdwxc +, llvmPackages +, gpuBackend ? "none" +, cudaPackages +, hip +, rocblas +}: + +assert builtins.elem gpuBackend [ "none" "cuda" "rocm" ]; + +stdenv.mkDerivation rec { + pname = "SIRIUS"; + version = "7.4.3"; + + src = fetchFromGitHub { + owner = "electronic-structure"; + repo = pname; + rev = "v${version}"; + hash = "sha256-s4rO+dePvtvn41wxCvbqgQGrEckWmfng7sPX2M8OPB0="; + }; + + postPatch = '' + substituteInPlace src/gpu/acc_blas_api.hpp \ + --replace '#include ' '#include ' + ''; + + nativeBuildInputs = [ + cmake + gfortran + pkg-config + ]; + + buildInputs = [ + blas + lapack + gsl + libxc + hdf5 + spglib + spfft + spla + costa + scalapack + boost + eigen + libvdwxc + ] + ++ lib.optional (gpuBackend == "cuda") cudaPackages.cudatoolkit + ++ lib.optionals (gpuBackend == "rocm") [ hip rocblas ] + ++ lib.optional stdenv.isDarwin llvmPackages.openmp + ; + + propagatedBuildInputs = [ mpi ]; + + cmakeFlags = [ + "-DUSE_SCALAPACK=ON" + "-DBUILD_TESTING=ON" + "-DUSE_VDWXC=ON" + "-DCREATE_FORTRAN_BINDINGS=ON" + "-DUSE_OPENMP=ON" + "-DBUILD_TESTING=ON" + ] + ++ lib.optionals (gpuBackend == "cuda") [ + "-DUSE_CUDA=ON" + "-DCUDA_TOOLKIT_ROOT_DIR=${cudaPackages.cudatoolkit}" + ] + ++ lib.optionals (gpuBackend == "rocm") [ + "-DUSE_ROCM=ON" + "-DHIP_ROOT_DIR=${hip}" + ]; + + doCheck = true; + + # Can not run parallel checks generally as it requires exactly multiples of 4 MPI ranks + checkPhase = '' + runHook preCheck + + ctest --output-on-failure --label-exclude integration_test + ctest --output-on-failure -L cpu_serial + + runHook postCheck + ''; + + nativeCheckInputs = [ + mpiCheckPhaseHook + openssh + ]; + + meta = with lib; { + description = "Domain specific library for electronic structure calculations"; + homepage = "https://github.com/electronic-structure/SIRIUS"; + license = licenses.bsd2; + platforms = platforms.linux; + maintainers = [ maintainers.sheepforce ]; + }; +} From dcd7efd1f993419527a63313d7158cef746b0106 Mon Sep 17 00:00:00 2001 From: Phillip Seeber Date: Thu, 5 Oct 2023 14:24:58 +0200 Subject: [PATCH 5/5] cp2k: enable GPU offloading, sirius planewaves and optimisations --- .../science/chemistry/cp2k/default.nix | 98 ++++++++++++++++--- 1 file changed, 85 insertions(+), 13 deletions(-) diff --git a/pkgs/applications/science/chemistry/cp2k/default.nix b/pkgs/applications/science/chemistry/cp2k/default.nix index e34810a3982b..052d791c0bb7 100644 --- a/pkgs/applications/science/chemistry/cp2k/default.nix +++ b/pkgs/applications/science/chemistry/cp2k/default.nix @@ -1,15 +1,57 @@ -{ lib, stdenv, fetchFromGitHub, mpiCheckPhaseHook, python3, gfortran, blas, lapack -, fftw, libint, libvori, libxc, mpi, gsl, scalapack, openssh, makeWrapper -, libxsmm, spglib, which, pkg-config, plumed, zlib +{ lib +, stdenv +, fetchFromGitHub +, mpiCheckPhaseHook +, python3 +, gfortran +, blas +, lapack +, fftw +, libint +, libvori +, libxc +, mpi +, gsl +, scalapack +, openssh +, makeWrapper +, libxsmm +, spglib +, which +, pkg-config +, plumed +, zlib +, hdf5-fortran +, sirius +, libvdwxc +, spla +, spfft , enableElpa ? false , elpa -} : +, gpuBackend ? "none" +, cudaPackages +# gpuVersion needs to be set for both CUDA as well as ROCM hardware. +# gpuArch is only required for the ROCM stack. +# Change to a value suitable for your target GPU. +# For AMD values see https://github.com/cp2k/cp2k/blob/master/INSTALL.md#2v-rocmhip-support-for-amd-gpu +# and for Nvidia see https://github.com/cp2k/cp2k/blob/master/INSTALL.md#2i-cuda-optional-improved-performance-on-gpu-systems +, gpuVersion ? "Mi100" +, gpuArch ? "gfx908" +, rocm-core +, hip +, hipblas +, hipfft +, rocblas +}: + +assert builtins.elem gpuBackend [ "none" "cuda" "rocm" ]; let cp2kVersion = "psmp"; arch = "Linux-x86-64-gfortran"; -in stdenv.mkDerivation rec { +in +stdenv.mkDerivation rec { pname = "cp2k"; version = "2023.2"; @@ -36,7 +78,16 @@ in stdenv.mkDerivation rec { lapack plumed zlib - ] ++ lib.optional enableElpa elpa; + hdf5-fortran + sirius + spla + spfft + libvdwxc + ] + ++ lib.optional enableElpa elpa + ++ lib.optional (gpuBackend == "cuda") cudaPackages.cudatoolkit + ++ lib.optional (gpuBackend == "rocm") [hip rocm-core hipblas hipfft rocblas] + ; propagatedBuildInputs = [ mpi ]; propagatedUserEnvPkgs = [ mpi ]; @@ -46,7 +97,7 @@ in stdenv.mkDerivation rec { "VERSION=${cp2kVersion}" ]; - doCheck = true; + doCheck = gpuBackend == "none"; enableParallelBuilding = true; @@ -64,25 +115,46 @@ in stdenv.mkDerivation rec { FC = mpif90 LD = mpif90 AR = ar -r + ${lib.strings.optionalString (gpuBackend == "cuda") '' + OFFLOAD_CC = nvcc + OFFLOAD_FLAGS = -O3 -g -w --std=c++11 + OFFLOAD_TARGET = cuda + GPUVER = ${gpuVersion} + CXX = mpicxx + CXXFLAGS = -std=c++11 -fopenmp + ''} + ${lib.strings.optionalString (gpuBackend == "rocm") '' + GPUVER = ${gpuVersion} + OFFLOAD_CC = hipcc + OFFLOAD_FLAGS = -fopenmp -m64 -pthread -fPIC -D__GRID_HIP -O2 --offload-arch=${gpuArch} --rocm-path=${rocm-core} + OFFLOAD_TARGET = hip + CXX = mpicxx + CXXFLAGS = -std=c++11 -fopenmp -D__HIP_PLATFORM_AMD__ + ''} DFLAGS = -D__FFTW3 -D__LIBXC -D__LIBINT -D__parallel -D__SCALAPACK \ -D__MPI_VERSION=3 -D__F2008 -D__LIBXSMM -D__SPGLIB \ -D__MAX_CONTR=4 -D__LIBVORI ${lib.optionalString enableElpa "-D__ELPA"} \ - -D__PLUMED2 - CFLAGS = -fopenmp + -D__PLUMED2 -D__HDF5 -D__GSL -D__SIRIUS -D__LIBVDWXC -D__SPFFT -D__SPLA \ + ${lib.strings.optionalString (gpuBackend == "cuda") "-D__OFFLOAD_CUDA -D__DBCSR_ACC"} \ + ${lib.strings.optionalString (gpuBackend == "rocm") "-D__OFFLOAD_HIP -D__DBCSR_ACC -D__NO_OFFLOAD_PW"} + CFLAGS = -fopenmp -I${lib.getDev hdf5-fortran}/include -I${lib.getDev gsl}/include FCFLAGS = \$(DFLAGS) -O2 -ffree-form -ffree-line-length-none \ -ftree-vectorize -funroll-loops -msse2 \ -std=f2008 \ -fopenmp -ftree-vectorize -funroll-loops \ - -I${lib.getDev libxc}/include -I${lib.getDev libxsmm}/include \ - -I${libint}/include ${lib.optionalString enableElpa "$(pkg-config --variable=fcflags elpa)"} + -I${lib.getDev libint}/include ${lib.optionalString enableElpa "$(pkg-config --variable=fcflags elpa)"} \ + -I${lib.getDev sirius}/include/sirius \ + -I${lib.getDev libxc}/include -I${lib.getDev libxsmm}/include LIBS = -lfftw3 -lfftw3_threads \ -lscalapack -lblas -llapack \ -lxcf03 -lxc -lxsmmf -lxsmm -lsymspg \ -lint2 -lstdc++ -lvori \ -lgomp -lpthread -lm \ -fopenmp ${lib.optionalString enableElpa "$(pkg-config --libs elpa)"} \ - -lz -ldl -lstdc++ ${lib.optionalString (mpi.pname == "openmpi") "$(mpicxx --showme:link)"} \ - -lplumed + -lz -ldl ${lib.optionalString (mpi.pname == "openmpi") "$(mpicxx --showme:link)"} \ + -lplumed -lhdf5_fortran -lhdf5_hl -lhdf5 -lgsl -lsirius -lspla -lspfft -lvdwxc \ + ${lib.strings.optionalString (gpuBackend == "cuda") "-lcudart -lnvrtc -lcuda -lcublas"} \ + ${lib.strings.optionalString (gpuBackend == "rocm") "-lamdhip64 -lhipfft -lhipblas -lrocblas"} LDFLAGS = \$(FCFLAGS) \$(LIBS) include ${plumed}/lib/plumed/src/lib/Plumed.inc EOF