diff --git a/nixos/doc/manual/release-notes/rl-2405.section.md b/nixos/doc/manual/release-notes/rl-2405.section.md index 29cac1dc45be..5f2c384473da 100644 --- a/nixos/doc/manual/release-notes/rl-2405.section.md +++ b/nixos/doc/manual/release-notes/rl-2405.section.md @@ -46,9 +46,7 @@ Use `services.pipewire.extraConfig` or `services.pipewire.configPackages` for Pi - The default dbus implementation has transitioned to dbus-broker from the classic dbus daemon for better performance and reliability. Users can revert to the classic dbus daemon by setting `services.dbus.implementation = "dbus";`. For detailed deviations, refer to [dbus-broker's deviations page](https://github.com/bus1/dbus-broker/wiki/Deviations). -- A new option `virtualisation.containers.cdi` was added. It contains `static` and `dynamic` attributes (corresponding to `/etc/cdi` and `/run/cdi` respectively) to configure the Container Device Interface (CDI). - -- `virtualisation.docker.enableNvidia` and `virtualisation.podman.enableNvidia` options are deprecated. `virtualisation.containers.cdi.dynamic.nvidia.enable` should be used instead. This option will expose GPUs on containers with the `--device` CLI option. This is supported by Docker 25, Podman 3.2.0 and Singularity 4. Any container runtime that supports the CDI specification will take advantage of this feature. +- `virtualisation.docker.enableNvidia` and `virtualisation.podman.enableNvidia` options are deprecated. `hardware.nvidia-container-toolkit.enable` should be used instead. This option will expose GPUs on containers with the `--device` CLI option. This is supported by Docker 25, Podman 3.2.0 and Singularity 4. Any container runtime that supports the CDI specification will take advantage of this feature. - `system.etc.overlay.enable` option was added. If enabled, `/etc` is mounted via an overlayfs instead of being created by a custom perl script. diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index 511d991e919c..50157f62ed32 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -559,7 +559,7 @@ ./services/hardware/kanata.nix ./services/hardware/lcd.nix ./services/hardware/lirc.nix - ./services/hardware/nvidia-container-toolkit-cdi-generator + ./services/hardware/nvidia-container-toolkit ./services/hardware/monado.nix ./services/hardware/nvidia-optimus.nix ./services/hardware/openrgb.nix diff --git a/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/cdi-generate.nix b/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/cdi-generate.nix deleted file mode 100644 index 1aaa2d07b9bd..000000000000 --- a/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/cdi-generate.nix +++ /dev/null @@ -1,60 +0,0 @@ -{ - addDriverRunpath, - glibc, - jq, - lib, - nvidia-container-toolkit, - nvidia-driver, - runtimeShell, - writeScriptBin, -}: -let - mountOptions = { options = ["ro" "nosuid" "nodev" "bind"]; }; - mounts = [ - # FIXME: Making /usr mounts optional - { hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-control"; - containerPath = "/usr/bin/nvidia-cuda-mps-control"; } - { hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-server"; - containerPath = "/usr/bin/nvidia-cuda-mps-server"; } - { hostPath = lib.getExe' nvidia-driver "nvidia-debugdump"; - containerPath = "/usr/bin/nvidia-debugdump"; } - { hostPath = lib.getExe' nvidia-driver "nvidia-powerd"; - containerPath = "/usr/bin/nvidia-powerd"; } - { hostPath = lib.getExe' nvidia-driver "nvidia-smi"; - containerPath = "/usr/bin/nvidia-smi"; } - { hostPath = lib.getExe' nvidia-container-toolkit "nvidia-ctk"; - containerPath = "/usr/bin/nvidia-ctk"; } - { hostPath = "${lib.getLib glibc}/lib"; - containerPath = "${lib.getLib glibc}/lib"; } - - # FIXME: use closureinfo - { - hostPath = addDriverRunpath.driverLink; - containerPath = addDriverRunpath.driverLink; - } - { hostPath = "${lib.getLib glibc}/lib"; - containerPath = "${lib.getLib glibc}/lib"; } - { hostPath = "${lib.getLib glibc}/lib64"; - containerPath = "${lib.getLib glibc}/lib64"; } - ]; - jqAddMountExpression = ".containerEdits.mounts[.containerEdits.mounts | length] |= . +"; - mountsToJq = lib.concatMap - (mount: - ["${lib.getExe jq} '${jqAddMountExpression} ${builtins.toJSON (mount // mountOptions)}'"]) - mounts; -in -writeScriptBin "nvidia-cdi-generator" -'' -#! ${runtimeShell} - -function cdiGenerate { - ${lib.getExe' nvidia-container-toolkit "nvidia-ctk"} cdi generate \ - --format json \ - --ldconfig-path ${lib.getExe' glibc "ldconfig"} \ - --library-search-path ${lib.getLib nvidia-driver}/lib \ - --nvidia-ctk-path ${lib.getExe' nvidia-container-toolkit "nvidia-ctk"} -} - -cdiGenerate | \ - ${lib.concatStringsSep " | " mountsToJq} > $RUNTIME_DIRECTORY/nvidia-container-toolkit.json -'' diff --git a/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/default.nix b/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/default.nix deleted file mode 100644 index 5aa3c72ee0a0..000000000000 --- a/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/default.nix +++ /dev/null @@ -1,40 +0,0 @@ -{ config, lib, pkgs, ... }: - -{ - - options = { - - hardware.nvidia-container-toolkit-cdi-generator.enable = lib.mkOption { - default = false; - internal = true; - visible = false; - type = lib.types.bool; - description = '' - Enable dynamic CDI configuration for NVidia devices by running - nvidia-container-toolkit on boot. - ''; - }; - - }; - - config = { - - systemd.services.nvidia-container-toolkit-cdi-generator = lib.mkIf config.hardware.nvidia-container-toolkit-cdi-generator.enable { - description = "Container Device Interface (CDI) for Nvidia generator"; - wantedBy = [ "multi-user.target" ]; - after = [ "systemd-udev-settle.service" ]; - serviceConfig = { - RuntimeDirectory = "cdi"; - RemainAfterExit = true; - ExecStart = - let - script = pkgs.callPackage ./cdi-generate.nix { nvidia-driver = config.hardware.nvidia.package; }; - in - lib.getExe script; - Type = "oneshot"; - }; - }; - - }; - -} diff --git a/nixos/modules/services/hardware/nvidia-container-toolkit/cdi-generate.nix b/nixos/modules/services/hardware/nvidia-container-toolkit/cdi-generate.nix new file mode 100644 index 000000000000..ca769cc44e5c --- /dev/null +++ b/nixos/modules/services/hardware/nvidia-container-toolkit/cdi-generate.nix @@ -0,0 +1,35 @@ +{ + glibc, + jq, + lib, + mounts, + nvidia-container-toolkit, + nvidia-driver, + runtimeShell, + writeScriptBin, +}: let + mkMount = {hostPath, containerPath, mountOptions}: { + inherit hostPath containerPath; + options = mountOptions; + }; + jqAddMountExpression = ".containerEdits.mounts[.containerEdits.mounts | length] |= . +"; + allJqMounts = lib.concatMap + (mount: + ["${lib.getExe jq} '${jqAddMountExpression} ${builtins.toJSON (mkMount mount)}'"]) + mounts; +in +writeScriptBin "nvidia-cdi-generator" +'' +#! ${runtimeShell} + +function cdiGenerate { + ${lib.getExe' nvidia-container-toolkit "nvidia-ctk"} cdi generate \ + --format json \ + --ldconfig-path ${lib.getExe' glibc "ldconfig"} \ + --library-search-path ${lib.getLib nvidia-driver}/lib \ + --nvidia-ctk-path ${lib.getExe' nvidia-container-toolkit "nvidia-ctk"} +} + +cdiGenerate | \ + ${lib.concatStringsSep " | " allJqMounts} > $RUNTIME_DIRECTORY/nvidia-container-toolkit.json +'' diff --git a/nixos/modules/services/hardware/nvidia-container-toolkit/default.nix b/nixos/modules/services/hardware/nvidia-container-toolkit/default.nix new file mode 100644 index 000000000000..7b4973d3c6b0 --- /dev/null +++ b/nixos/modules/services/hardware/nvidia-container-toolkit/default.nix @@ -0,0 +1,121 @@ +{ config, lib, pkgs, ... }: + +{ + imports = [ + (lib.mkRenamedOptionModule + [ "virtualisation" "containers" "cdi" "dynamic" "nvidia" "enable" ] + [ "hardware" "nvidia-container-toolkit" "enable" ]) + ]; + + options = let + mountType = { + options = { + hostPath = lib.mkOption { + type = lib.types.str; + description = "Host path."; + }; + containerPath = lib.mkOption { + type = lib.types.str; + description = "Container path."; + }; + mountOptions = lib.mkOption { + default = [ "ro" "nosuid" "nodev" "bind" ]; + type = lib.types.listOf lib.types.str; + description = "Mount options."; + }; + }; + }; + in { + + hardware.nvidia-container-toolkit = { + enable = lib.mkOption { + default = false; + type = lib.types.bool; + description = '' + Enable dynamic CDI configuration for NVidia devices by running + nvidia-container-toolkit on boot. + ''; + }; + + mounts = lib.mkOption { + type = lib.types.listOf (lib.types.submodule mountType); + default = []; + description = "Mounts to be added to every container under the Nvidia CDI profile."; + }; + + mount-nvidia-executables = lib.mkOption { + default = true; + type = lib.types.bool; + description = '' + Mount executables nvidia-smi, nvidia-cuda-mps-control, nvidia-cuda-mps-server, + nvidia-debugdump, nvidia-powerd and nvidia-ctk on containers. + ''; + }; + + mount-nvidia-docker-1-directories = lib.mkOption { + default = true; + type = lib.types.bool; + description = '' + Mount nvidia-docker-1 directories on containers: /usr/local/nvidia/lib and + /usr/local/nvidia/lib64. + ''; + }; + }; + + }; + + config = { + + hardware.nvidia-container-toolkit.mounts = let + nvidia-driver = config.hardware.nvidia.package; + in (lib.mkMerge [ + [{ hostPath = pkgs.addDriverRunpath.driverLink; + containerPath = pkgs.addDriverRunpath.driverLink; } + { hostPath = "${lib.getLib pkgs.glibc}/lib"; + containerPath = "${lib.getLib pkgs.glibc}/lib"; } + { hostPath = "${lib.getLib pkgs.glibc}/lib64"; + containerPath = "${lib.getLib pkgs.glibc}/lib64"; }] + (lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-executables + [{ hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-control"; + containerPath = "/usr/bin/nvidia-cuda-mps-control"; } + { hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-server"; + containerPath = "/usr/bin/nvidia-cuda-mps-server"; } + { hostPath = lib.getExe' nvidia-driver "nvidia-debugdump"; + containerPath = "/usr/bin/nvidia-debugdump"; } + { hostPath = lib.getExe' nvidia-driver "nvidia-powerd"; + containerPath = "/usr/bin/nvidia-powerd"; } + { hostPath = lib.getExe' nvidia-driver "nvidia-smi"; + containerPath = "/usr/bin/nvidia-smi"; }]) + # nvidia-docker 1.0 uses /usr/local/nvidia/lib{,64} + # e.g. + # - https://gitlab.com/nvidia/container-images/cuda/-/blob/e3ff10eab3a1424fe394899df0e0f8ca5a410f0f/dist/12.3.1/ubi9/base/Dockerfile#L44 + # - https://github.com/NVIDIA/nvidia-docker/blob/01d2c9436620d7dde4672e414698afe6da4a282f/src/nvidia/volumes.go#L104-L173 + (lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-docker-1-directories + [{ hostPath = "${lib.getLib nvidia-driver}/lib"; + containerPath = "/usr/local/nvidia/lib"; } + { hostPath = "${lib.getLib nvidia-driver}/lib"; + containerPath = "/usr/local/nvidia/lib64"; }]) + ]); + + systemd.services.nvidia-container-toolkit-cdi-generator = lib.mkIf config.hardware.nvidia-container-toolkit.enable { + description = "Container Device Interface (CDI) for Nvidia generator"; + wantedBy = [ "multi-user.target" ]; + after = [ "systemd-udev-settle.service" ]; + serviceConfig = { + RuntimeDirectory = "cdi"; + RemainAfterExit = true; + ExecStart = + let + script = pkgs.callPackage ./cdi-generate.nix { + inherit (config.hardware.nvidia-container-toolkit) mounts; + nvidia-driver = config.hardware.nvidia.package; + }; + in + lib.getExe script; + Type = "oneshot"; + }; + }; + + }; + +} diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix index d72695530786..65620dd3935b 100644 --- a/nixos/modules/virtualisation/containers.nix +++ b/nixos/modules/virtualisation/containers.nix @@ -28,43 +28,6 @@ in description = "Enable the OCI seccomp BPF hook"; }; - cdi = { - dynamic.nvidia.enable = mkOption { - type = types.bool; - default = false; - description = '' - Enable dynamic CDI configuration for NVidia devices by running nvidia-container-toolkit on boot. - ''; - }; - - static = mkOption { - type = types.attrs; - default = { }; - description = '' - Declarative CDI specification. Each key of the attribute set - will be mapped to a file in /etc/cdi. It is required for every - key to be provided in JSON format. - ''; - example = { - some-vendor = builtins.fromJSON '' - { - "cdiVersion": "0.5.0", - "kind": "some-vendor.com/foo", - "devices": [], - "containerEdits": [] - } - ''; - - some-other-vendor = { - cdiVersion = "0.5.0"; - kind = "some-other-vendor.com/bar"; - devices = []; - containerEdits = []; - }; - }; - }; - }; - containersConf.settings = mkOption { type = toml.type; default = { }; @@ -150,8 +113,6 @@ in config = lib.mkIf cfg.enable { - hardware.nvidia-container-toolkit-cdi-generator.enable = lib.mkIf cfg.cdi.dynamic.nvidia.enable true; - virtualisation.containers.containersConf.cniPlugins = [ pkgs.cni-plugins ]; virtualisation.containers.containersConf.settings = { @@ -163,13 +124,7 @@ in }; }; - environment.etc = let - cdiStaticConfigurationFiles = (lib.attrsets.mapAttrs' - (name: value: - lib.attrsets.nameValuePair "cdi/${name}.json" - { text = builtins.toJSON value; }) - cfg.cdi.static); - in { + environment.etc = { "containers/containers.conf".source = toml.generate "containers.conf" cfg.containersConf.settings; @@ -183,7 +138,7 @@ in "containers/policy.json".source = if cfg.policy != { } then pkgs.writeText "policy.json" (builtins.toJSON cfg.policy) else "${pkgs.skopeo.policy}/default-policy.json"; - } // cdiStaticConfigurationFiles; + }; };