nixpkgs/pkgs/by-name/dc/dcgm/package.nix

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

123 lines
3.7 KiB
Nix
Raw Normal View History

2023-04-25 03:42:44 +00:00
{ lib
, gcc11Stdenv
, fetchFromGitHub
2024-03-30 17:17:46 +00:00
, autoAddDriverRunpath
2023-04-25 03:42:44 +00:00
, catch2
, cmake
2024-11-19 18:31:39 +00:00
, ninja
2023-04-25 03:42:44 +00:00
, cudaPackages_11_8
, cudaPackages_12
, boost
2023-04-25 03:42:44 +00:00
, fmt_9
, git
, jsoncpp
, libevent
, plog
, python3
, symlinkJoin
, tclap_1_4
, yaml-cpp
, static ? gcc11Stdenv.hostPlatform.isStatic
2023-04-25 03:42:44 +00:00
}:
let
# DCGM depends on 2 different versions of CUDA at the same time.
# The runtime closure, thankfully, is quite small as it does not
# include the CUDA libraries.
cudaPackageSets = [
cudaPackages_11_8
cudaPackages_12
2023-04-25 03:42:44 +00:00
];
# Select needed redist packages from cudaPackages
# C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/scripts/0080_cuda.sh#L24-L39
getCudaPackages = p: with p; [
cuda_cccl
cuda_cudart
cuda_nvcc
cuda_nvml_dev
libcublas
libcufft
libcurand
];
# Builds CMake flags to add CUDA paths for include and lib.
mkCudaFlags = cudaPackages:
2023-04-25 03:42:44 +00:00
let
version = cudaPackages.cudaMajorVersion;
2023-04-25 03:42:44 +00:00
# The DCGM CMake assumes that the folder containing cuda.h contains all headers, so we must
# combine everything together for headers to work.
headers = symlinkJoin {
name = "cuda-headers-combined-${version}";
paths = lib.map (pkg: "${lib.getInclude pkg}/include") (getCudaPackages cudaPackages);
2023-04-25 03:42:44 +00:00
};
in [
(lib.cmakeFeature "CUDA${version}_INCLUDE_DIR" "${headers}")
(lib.cmakeFeature "CUDA${version}_LIBS" "${cudaPackages.cuda_cudart.stubs}/lib/stubs/libcuda.so")
(lib.cmakeFeature "CUDA${version}_STATIC_LIBS" "${lib.getLib cudaPackages.cuda_cudart}/lib/libcudart.so")
(lib.cmakeFeature "CUDA${version}_STATIC_CUBLAS_LIBS" (lib.concatStringsSep ";" [
"${lib.getLib cudaPackages.libcublas}/lib/libcublas.so"
"${lib.getLib cudaPackages.libcublas}/lib/libcublasLt.so"
]))
];
2023-04-25 03:42:44 +00:00
# gcc11 is required by DCGM's very particular build system
# C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/build.sh#L22
in gcc11Stdenv.mkDerivation rec {
pname = "dcgm";
version = "3.3.9"; # N.B: If you change this, be sure prometheus-dcgm-exporter supports this version.
2023-04-25 03:42:44 +00:00
src = fetchFromGitHub {
owner = "NVIDIA";
repo = "DCGM";
rev = "refs/tags/v${version}";
hash = "sha256-PysxuN5WT7GB0oOvT5ezYeOau6AMVDDWE5HOAcmqw/Y=";
2023-04-25 03:42:44 +00:00
};
hardeningDisable = [ "all" ];
2023-08-05 00:58:03 +00:00
strictDeps = true;
2023-04-25 03:42:44 +00:00
nativeBuildInputs = [
# autoAddDriverRunpath does not actually depend on or incur any dependency
# of cudaPackages. It merely adds an impure, non-Nix PATH to the RPATHs of
# executables that need to use cuda at runtime.
2024-03-30 17:17:46 +00:00
autoAddDriverRunpath
2023-04-25 03:42:44 +00:00
cmake
2024-11-19 18:31:39 +00:00
ninja
2023-04-25 03:42:44 +00:00
git
python3
2023-08-05 00:58:03 +00:00
];
2023-04-25 03:42:44 +00:00
2023-08-05 00:58:03 +00:00
buildInputs = [
# Header-only
boost
2023-04-25 03:42:44 +00:00
catch2
plog.dev
tclap_1_4
# Dependencies that can be either static or dynamic.
(fmt_9.override { enableShared = !static; }) # DCGM's build uses the static outputs regardless of enableShared
(yaml-cpp.override { inherit static; stdenv = gcc11Stdenv; })
# TODO: Dependencies that DCGM's CMake hard-codes to be static-only.
(jsoncpp.override { enableStatic = true; })
(libevent.override { sslSupport = false; static = true; })
2023-04-25 03:42:44 +00:00
];
# Add our paths to the CMake flags so FindCuda.cmake can find them.
cmakeFlags = lib.concatMap mkCudaFlags cudaPackageSets;
disallowedReferences = lib.concatMap getCudaPackages cudaPackageSets;
2023-04-25 03:42:44 +00:00
meta = with lib; {
description = "Data Center GPU Manager (DCGM) is a daemon that allows users to monitor NVIDIA data-center GPUs";
homepage = "https://developer.nvidia.com/dcgm";
license = licenses.asl20;
maintainers = teams.deshaw.members;
mainProgram = "dcgmi";
platforms = platforms.linux;
};
}