nvidia_x11.dc_535: 535.129.03

This commit is contained in:
timothy 2023-12-15 18:20:42 +07:00
parent 987845b73e
commit 6e4d90f0b0
5 changed files with 64 additions and 24 deletions

View File

@ -47,7 +47,8 @@ in {
TRUNK_LINK_FAILURE_MODE=0;
NVSWITCH_FAILURE_MODE=0;
ABORT_CUDA_JOBS_ON_FM_EXIT=1;
TOPOLOGY_FILE_PATH=nvidia_x11.fabricmanager + "/share/nvidia-fabricmanager/nvidia/nvswitch";
TOPOLOGY_FILE_PATH="${nvidia_x11.fabricmanager}/share/nvidia-fabricmanager/nvidia/nvswitch";
DATABASE_PATH="${nvidia_x11.fabricmanager}/share/nvidia-fabricmanager/nvidia/nvswitch";
};
defaultText = lib.literalExpression ''
{
@ -69,7 +70,8 @@ in {
TRUNK_LINK_FAILURE_MODE=0;
NVSWITCH_FAILURE_MODE=0;
ABORT_CUDA_JOBS_ON_FM_EXIT=1;
TOPOLOGY_FILE_PATH=nvidia_x11.fabricmanager + "/share/nvidia-fabricmanager/nvidia/nvswitch";
TOPOLOGY_FILE_PATH="''${nvidia_x11.fabricmanager}/share/nvidia-fabricmanager/nvidia/nvswitch";
DATABASE_PATH="''${nvidia_x11.fabricmanager}/share/nvidia-fabricmanager/nvidia/nvswitch";
}
'';
description = lib.mdDoc ''
@ -584,24 +586,50 @@ in {
boot.extraModulePackages = [
nvidia_x11.bin
];
systemd.services.nvidia-fabricmanager = {
enable = true;
description = "Start NVIDIA NVLink Management";
wantedBy = [ "multi-user.target" ];
unitConfig.After = [ "network-online.target" ];
unitConfig.Requires = [ "network-online.target" ];
serviceConfig = {
Type = "forking";
TimeoutStartSec = 240;
ExecStart = let
nv-fab-conf = settingsFormat.generate "fabricmanager.conf" cfg.datacenter.settings;
in
nvidia_x11.fabricmanager + "/bin/nv-fabricmanager -c " + nv-fab-conf;
LimitCORE="infinity";
};
};
environment.systemPackages =
lib.optional cfg.datacenter.enable nvidia_x11.fabricmanager;
})
]);
systemd = {
tmpfiles.rules =
lib.optional (nvidia_x11.persistenced != null && config.virtualisation.docker.enableNvidia)
"L+ /run/nvidia-docker/extras/bin/nvidia-persistenced - - - - ${nvidia_x11.persistenced}/origBin/nvidia-persistenced";
services = lib.mkMerge [
({
nvidia-fabricmanager = {
enable = true;
description = "Start NVIDIA NVLink Management";
wantedBy = [ "multi-user.target" ];
unitConfig.After = [ "network-online.target" ];
unitConfig.Requires = [ "network-online.target" ];
serviceConfig = {
Type = "forking";
TimeoutStartSec = 240;
ExecStart = let
nv-fab-conf = settingsFormat.generate "fabricmanager.conf" cfg.datacenter.settings;
in
"${lib.getExe nvidia_x11.fabricmanager} -c ${nv-fab-conf}";
LimitCORE="infinity";
};
};
})
(lib.mkIf cfg.nvidiaPersistenced {
"nvidia-persistenced" = {
description = "NVIDIA Persistence Daemon";
wantedBy = ["multi-user.target"];
serviceConfig = {
Type = "forking";
Restart = "always";
PIDFile = "/var/run/nvidia-persistenced/nvidia-persistenced.pid";
ExecStart = "${lib.getExe nvidia_x11.persistenced} --verbose";
ExecStopPost = "${pkgs.coreutils}/bin/rm -rf /var/run/nvidia-persistenced";
};
};
})
];
};
environment.systemPackages =
lib.optional cfg.datacenter.enable nvidia_x11.fabricmanager
++ lib.optional cfg.nvidiaPersistenced nvidia_x11.persistenced;
})
]);
}

View File

@ -81,6 +81,17 @@ rec {
useFabricmanager = true;
};
dc_535 = generic rec {
version = "535.129.03";
url = "https://us.download.nvidia.com/tesla/${version}/NVIDIA-Linux-x86_64-${version}.run";
sha256_64bit = "sha256-5tylYmomCMa7KgRs/LfBrzOLnpYafdkKwJu4oSb/AC4=";
persistencedSha256 = "sha256-FRMqY5uAJzq3o+YdM2Mdjj8Df6/cuUUAnh52Ne4koME=";
fabricmanagerSha256 = "sha256-5KRYS+JLVAhDkBn8Z7e0uJvULQy6dSpwnYsbBxw7Mxg=";
useSettings = false;
usePersistenced = true;
useFabricmanager = true;
};
# Update note:
# If you add a legacy driver here, also update `top-level/linux-kernels.nix`,
# adding to the `nvidia_x11_legacy*` entries.

View File

@ -12,7 +12,7 @@ stdenv.mkDerivation rec {
pname = "fabricmanager";
version = fmver;
src = fetchurl {
url = "https://developer.download.nvidia.com/compute/cuda/redist/fabricmanager/" +
url = "https://developer.download.nvidia.com/compute/nvidia-driver/redist/fabricmanager/" +
"${sys}/${pname}-${sys}-${fmver}-archive.tar.xz";
inherit sha256;
};

View File

@ -63,7 +63,7 @@ assert versionOlder version "391" -> sha256_32bit != null;
assert useSettings -> settingsSha256 != null;
assert usePersistenced -> persistencedSha256 != null;
assert useFabricmanager -> fabricmanagerSha256 != null;
assert useFabricmanager -> !(useSettings || usePersistenced);
assert useFabricmanager -> !useSettings;
let
nameSuffix = optionalString (!libsOnly) "-${kernel.version}";

View File

@ -406,6 +406,7 @@ in {
nvidia_x11_vulkan_beta = nvidiaPackages.vulkan_beta;
nvidia_dc = nvidiaPackages.dc;
nvidia_dc_520 = nvidiaPackages.dc_520;
nvidia_dc_535 = nvidiaPackages.dc_535;
# this is not a replacement for nvidia_x11*
# only the opensource kernel driver exposed for hydra to build