nvidia-container-toolkit: only mount existing paths in the host (#319772)

This commit is contained in:
Someone 2024-08-27 11:18:18 +00:00 committed by GitHub
commit 4faf4ca9af
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 175 additions and 7 deletions

View File

@ -13,11 +13,14 @@
inherit hostPath containerPath;
options = mountOptions;
};
jqAddMountExpression = ".containerEdits.mounts[.containerEdits.mounts | length] |= . +";
allJqMounts = lib.concatMap
(mount:
["${lib.getExe jq} '${jqAddMountExpression} ${builtins.toJSON (mkMount mount)}'"])
mounts;
mountToCommand = mount:
"additionalMount \"${mount.hostPath}\" \"${mount.containerPath}\" '${builtins.toJSON mount.mountOptions}'";
mountsToCommands = mounts:
if (builtins.length mounts) == 0 then
"cat"
else
(lib.strings.concatMapStringsSep " | \\\n"
mountToCommand mounts);
in
writeScriptBin "nvidia-cdi-generator"
''
@ -32,6 +35,18 @@ function cdiGenerate {
--nvidia-ctk-path ${lib.getExe' nvidia-container-toolkit "nvidia-ctk"}
}
cdiGenerate | \
${lib.concatStringsSep " | " allJqMounts} > $RUNTIME_DIRECTORY/nvidia-container-toolkit.json
function additionalMount {
local hostPath="$1"
local containerPath="$2"
local mountOptions="$3"
if [ -e "$hostPath" ]; then
${lib.getExe jq} ".containerEdits.mounts[.containerEdits.mounts | length] = { \"hostPath\": \"$hostPath\", \"containerPath\": \"$containerPath\", \"options\": $mountOptions }"
else
echo "Mount $hostPath ignored: could not find path in the host machine" >&2
cat
fi
}
cdiGenerate |
${mountsToCommands mounts} > $RUNTIME_DIRECTORY/nvidia-container-toolkit.json
''

View File

@ -71,6 +71,8 @@
/usr/local/nvidia/lib64.
'';
};
package = lib.mkPackageOption pkgs "nvidia-container-toolkit" { };
};
};
@ -129,6 +131,7 @@
let
script = pkgs.callPackage ./cdi-generate.nix {
inherit (config.hardware.nvidia-container-toolkit) mounts;
nvidia-container-toolkit = config.hardware.nvidia-container-toolkit.package;
nvidia-driver = config.hardware.nvidia.package;
deviceNameStrategy = config.hardware.nvidia-container-toolkit.device-name-strategy;
};

View File

@ -705,6 +705,7 @@ in {
ntfy-sh = handleTest ./ntfy-sh.nix {};
ntfy-sh-migration = handleTest ./ntfy-sh-migration.nix {};
ntpd-rs = handleTest ./ntpd-rs.nix {};
nvidia-container-toolkit = runTest ./nvidia-container-toolkit.nix;
nvmetcfg = handleTest ./nvmetcfg.nix {};
nzbget = handleTest ./nzbget.nix {};
nzbhydra2 = handleTest ./nzbhydra2.nix {};

View File

@ -0,0 +1,149 @@
{ pkgs, lib, ... }:
let
testCDIScript = pkgs.writeShellScriptBin "test-cdi" ''
die() {
echo "$1"
exit 1
}
check_file_referential_integrity() {
echo "checking $1 referential integrity"
( ${pkgs.glibc.bin}/bin/ldd "$1" | ${lib.getExe pkgs.gnugrep} "not found" &> /dev/null ) && return 1
return 0
}
check_directory_referential_integrity() {
${lib.getExe pkgs.findutils} "$1" -type f -print0 | while read -d $'\0' file; do
if [[ $(${lib.getExe pkgs.file} "$file" | ${lib.getExe pkgs.gnugrep} ELF) ]]; then
check_file_referential_integrity "$file" || exit 1
else
echo "skipping $file: not an ELF file"
fi
done
}
check_directory_referential_integrity "/usr/bin" || exit 1
check_directory_referential_integrity "${pkgs.addDriverRunpath.driverLink}" || exit 1
check_directory_referential_integrity "/usr/local/nvidia" || exit 1
'';
testContainerImage = pkgs.dockerTools.buildImage {
name = "cdi-test";
tag = "latest";
config = {
Cmd = [ (lib.getExe testCDIScript) ];
};
copyToRoot = with pkgs.dockerTools; [
usrBinEnv
binSh
];
};
emptyCDISpec = ''
{
"cdiVersion": "0.5.0",
"kind": "nvidia.com/gpu",
"devices": [
{
"name": "all",
"containerEdits": {
"deviceNodes": [
{
"path": "/dev/urandom"
}
],
"hooks": [],
"mounts": []
}
}
],
"containerEdits": {
"deviceNodes": [],
"hooks": [],
"mounts": []
}
}
'';
nvidia-container-toolkit = {
enable = true;
package = pkgs.stdenv.mkDerivation {
pname = "nvidia-ctk-dummy";
version = "1.0.0";
dontUnpack = true;
dontBuild = true;
inherit emptyCDISpec;
passAsFile = [ "emptyCDISpec" ];
installPhase = ''
mkdir -p $out/bin $out/share/nvidia-container-toolkit
cp "$emptyCDISpecPath" "$out/share/nvidia-container-toolkit/spec.json"
echo -n "$emptyCDISpec" > "$out/bin/nvidia-ctk";
cat << EOF > "$out/bin/nvidia-ctk"
#!${pkgs.runtimeShell}
cat "$out/share/nvidia-container-toolkit/spec.json"
EOF
chmod +x $out/bin/nvidia-ctk
'';
meta.mainProgram = "nvidia-ctk";
};
};
in
{
name = "nvidia-container-toolkit";
meta = with lib.maintainers; {
maintainers = [ ereslibre ];
};
defaults =
{ config, ... }:
{
environment.systemPackages = with pkgs; [ jq ];
virtualisation.diskSize = lib.mkDefault 10240;
virtualisation.containers.enable = lib.mkDefault true;
hardware = {
inherit nvidia-container-toolkit;
nvidia = {
open = true;
package = config.boot.kernelPackages.nvidiaPackages.stable.open;
};
graphics.enable = lib.mkDefault true;
};
};
nodes = {
no-gpus = {
virtualisation.containers.enable = false;
hardware.graphics.enable = false;
};
one-gpu =
{ pkgs, ... }:
{
environment.systemPackages = with pkgs; [ podman ];
hardware.graphics.enable = true;
};
one-gpu-invalid-host-paths = {
hardware.nvidia-container-toolkit.mounts = [
{
hostPath = "/non-existant-path";
containerPath = "/some/path";
}
];
};
};
testScript = ''
start_all()
with subtest("Generate an empty CDI spec for a machine with no Nvidia GPUs"):
no_gpus.wait_for_unit("nvidia-container-toolkit-cdi-generator.service")
no_gpus.succeed("cat /var/run/cdi/nvidia-container-toolkit.json | jq")
with subtest("Podman loads the generated CDI spec for a machine with an Nvidia GPU"):
one_gpu.wait_for_unit("nvidia-container-toolkit-cdi-generator.service")
one_gpu.succeed("cat /var/run/cdi/nvidia-container-toolkit.json | jq")
one_gpu.succeed("podman load < ${testContainerImage}")
print(one_gpu.succeed("podman run --pull=never --device=nvidia.com/gpu=all -v /run/opengl-driver:/run/opengl-driver:ro cdi-test:latest"))
# Issue: https://github.com/NixOS/nixpkgs/issues/319201
with subtest("The generated CDI spec skips specified non-existant paths in the host"):
one_gpu_invalid_host_paths.wait_for_unit("nvidia-container-toolkit-cdi-generator.service")
one_gpu_invalid_host_paths.fail("grep 'non-existant-path' /var/run/cdi/nvidia-container-toolkit.json")
'';
}