dcgm: 3.3.5 -> 3.3.9

Fixes the build and matches upstream in dropping CUDA 10. Diff: https://github.com/NVIDIA/DCGM/compare/refs/tags/v3.3.5...v3.3.9
2025-02-03 02:33:15 +00:00 · 2024-11-19 18:31:39 +00:00 · 2024-11-19 18:31:39 +00:00 · e75510817a
commit e75510817a
parent 082273f5bb
1 changed files with 29 additions and 45 deletions
--- a/pkgs/by-name/dc/dcgm/package.nix
+++ b/pkgs/by-name/dc/dcgm/package.nix
@ -5,9 +5,9 @@
 , catch2
 , cmake
 , ninja
-, cudaPackages_10_2
 , cudaPackages_11_8
 , cudaPackages_12
+, boost
 , fmt_9
 , git
 , jsoncpp
@ -21,26 +21,12 @@
 , static ? gcc11Stdenv.hostPlatform.isStatic
 }:
 let
-  # DCGM depends on 3 different versions of CUDA at the same time.
-  # The runtime closure, thankfully, is quite small because most things
-  # are statically linked.
-  cudaPackageSetByVersion = [
-    {
-      version = "10";
-      # Nixpkgs cudaPackages_10 doesn't have redist packages broken out.
-      pkgSet = [
-        cudaPackages_10_2.cudatoolkit
-        cudaPackages_10_2.cudatoolkit.lib
-      ];
-    }
-    {
-      version = "11";
-      pkgSet = getCudaPackages cudaPackages_11_8;
-    }
-    {
-      version = "12";
-      pkgSet = getCudaPackages cudaPackages_12;
-    }
+  # DCGM depends on 2 different versions of CUDA at the same time.
+  # The runtime closure, thankfully, is quite small as it does not
+  # include the CUDA libraries.
+  cudaPackageSets = [
+    cudaPackages_11_8
+    cudaPackages_12
  ];

  # Select needed redist packages from cudaPackages
@ -55,45 +41,39 @@ let
    libcurand
  ];

-  # Builds CMake code to add CUDA paths for include and lib.
-  mkAppendCudaPaths = { version, pkgSet }:
+  # Builds CMake flags to add CUDA paths for include and lib.
+  mkCudaFlags = cudaPackages:
    let
+      version = cudaPackages.cudaMajorVersion;
      # The DCGM CMake assumes that the folder containing cuda.h contains all headers, so we must
      # combine everything together for headers to work.
-      # It would be more convenient to use symlinkJoin on *just* the include subdirectories
-      # of each package, but not all of them have an include directory and making that work
-      # is more effort than it's worth for this temporary, build-time package.
-      combined = symlinkJoin {
-        name = "cuda-combined-${version}";
-        paths = pkgSet;
+      headers = symlinkJoin {
+        name = "cuda-headers-combined-${version}";
+        paths = lib.map (pkg: "${lib.getInclude pkg}/include") (getCudaPackages cudaPackages);
      };
-      # The combined package above breaks the build for some reason so we just configure
-      # each package's library path.
-      libs = lib.concatMapStringsSep " " (x: ''"${x}/lib"'') pkgSet;
-    in ''
-      list(APPEND Cuda${version}_INCLUDE_PATHS "${combined}/include")
-      list(APPEND Cuda${version}_LIB_PATHS ${libs})
-    '';
+    in [
+      (lib.cmakeFeature "CUDA${version}_INCLUDE_DIR" "${headers}")
+      (lib.cmakeFeature "CUDA${version}_LIBS" "${cudaPackages.cuda_cudart.stubs}/lib/stubs/libcuda.so")
+      (lib.cmakeFeature "CUDA${version}_STATIC_LIBS" "${lib.getLib cudaPackages.cuda_cudart}/lib/libcudart.so")
+      (lib.cmakeFeature "CUDA${version}_STATIC_CUBLAS_LIBS" (lib.concatStringsSep ";" [
+        "${lib.getLib cudaPackages.libcublas}/lib/libcublas.so"
+        "${lib.getLib cudaPackages.libcublas}/lib/libcublasLt.so"
+      ]))
+    ];

 # gcc11 is required by DCGM's very particular build system
 # C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/build.sh#L22
 in gcc11Stdenv.mkDerivation rec {
  pname = "dcgm";
-  version = "3.3.5"; # N.B: If you change this, be sure prometheus-dcgm-exporter supports this version.
+  version = "3.3.9"; # N.B: If you change this, be sure prometheus-dcgm-exporter supports this version.

  src = fetchFromGitHub {
    owner = "NVIDIA";
    repo = "DCGM";
    rev = "refs/tags/v${version}";
-    hash = "sha256-n/uWvgvxAGfr1X51XgtHfFGDOO5AMBSV5UWQQpsylpg=";
+    hash = "sha256-PysxuN5WT7GB0oOvT5ezYeOau6AMVDDWE5HOAcmqw/Y=";
  };

-  # Add our paths to the CUDA paths so FindCuda.cmake can find them.
-  EXTRA_CUDA_PATHS = lib.concatMapStringsSep "\n" mkAppendCudaPaths cudaPackageSetByVersion;
-  prePatch = ''
-    echo "$EXTRA_CUDA_PATHS"$'\n'"$(cat cmake/FindCuda.cmake)" > cmake/FindCuda.cmake
-  '';
-
  hardeningDisable = [ "all" ];

  strictDeps = true;
@ -112,6 +92,7 @@ in gcc11Stdenv.mkDerivation rec {

  buildInputs = [
    # Header-only
+    boost
    catch2
    plog.dev
    tclap_1_4
@ -125,7 +106,10 @@ in gcc11Stdenv.mkDerivation rec {
    (libevent.override { sslSupport = false; static = true; })
  ];

-  disallowedReferences = lib.concatMap (x: x.pkgSet) cudaPackageSetByVersion;
+  # Add our paths to the CMake flags so FindCuda.cmake can find them.
+  cmakeFlags = lib.concatMap mkCudaFlags cudaPackageSets;
+
+  disallowedReferences = lib.concatMap getCudaPackages cudaPackageSets;

  meta = with lib; {
    description = "Data Center GPU Manager (DCGM) is a daemon that allows users to monitor NVIDIA data-center GPUs";