nixos/modules/virtualisation: additional configuration options (#349537)

oci-containers: additional configuration options
This commit is contained in:
Yethal 2024-12-05 18:48:41 +01:00 committed by GitHub
parent 51e98f6c79
commit 04bf3d8774
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 297 additions and 154 deletions

View File

@ -1,4 +1,10 @@
{ config, options, lib, pkgs, ... }:
{
config,
options,
lib,
pkgs,
...
}:
with lib;
let
@ -8,7 +14,8 @@ let
defaultBackend = options.virtualisation.oci-containers.backend.default;
containerOptions =
{ ... }: {
{ ... }:
{
options = {
@ -77,8 +84,8 @@ let
};
cmd = mkOption {
type = with types; listOf str;
default = [];
type = with types; listOf str;
default = [ ];
description = "Commandline arguments to pass to the image's entrypoint.";
example = literalExpression ''
["--port=9000"]
@ -87,7 +94,7 @@ let
labels = mkOption {
type = with types; attrsOf str;
default = {};
default = { };
description = "Labels to attach to the container at runtime.";
example = literalExpression ''
{
@ -105,26 +112,26 @@ let
environment = mkOption {
type = with types; attrsOf str;
default = {};
default = { };
description = "Environment variables to set for this container.";
example = literalExpression ''
{
DATABASE_HOST = "db.example.com";
DATABASE_PORT = "3306";
}
'';
'';
};
environmentFiles = mkOption {
type = with types; listOf path;
default = [];
default = [ ];
description = "Environment files for this container.";
example = literalExpression ''
[
/path/to/.env
/path/to/.env.secret
]
'';
'';
};
log-driver = mkOption {
@ -147,7 +154,7 @@ let
ports = mkOption {
type = with types; listOf str;
default = [];
default = [ ];
description = ''
Network ports to publish from the container to the outer host.
@ -194,7 +201,7 @@ let
volumes = mkOption {
type = with types; listOf str;
default = [];
default = [ ];
description = ''
List of volumes to attach to this container.
@ -222,7 +229,7 @@ let
dependsOn = mkOption {
type = with types; listOf str;
default = [];
default = [ ];
description = ''
Define which other containers this one depends on. They will be added to both After and Requires for the unit.
@ -247,14 +254,17 @@ let
preRunExtraOptions = mkOption {
type = with types; listOf str;
default = [];
default = [ ];
description = "Extra options for {command}`${defaultBackend}` that go before the `run` argument.";
example = [ "--runtime" "runsc" ];
example = [
"--runtime"
"runsc"
];
};
extraOptions = mkOption {
type = with types; listOf str;
default = [];
default = [ ];
description = "Extra options for {command}`${defaultBackend} run`.";
example = literalExpression ''
["--network=host"]
@ -262,177 +272,293 @@ let
};
autoStart = mkOption {
type = types.bool;
type = with types; bool;
default = true;
description = ''
When enabled, the container is automatically started on boot.
If this option is set to false, the container has to be started on-demand via its service.
'';
};
pull = mkOption {
type =
with types;
enum [
"always"
"missing"
"never"
"newer"
];
default = "missing";
description = ''
Image pull policy for the container. Must be one of: always, missing, never, newer
'';
};
capAdd = mkOption {
type = with types; lazyAttrsOf (nullOr bool);
default = { };
description = ''
Capabilities to add to container
'';
example = literalExpression ''
{
SYS_ADMIN = true;
{
'';
};
capDrop = mkOption {
type = with types; lazyAttrsOf (nullOr bool);
default = { };
description = ''
Capabilities to drop from container
'';
example = literalExpression ''
{
SYS_ADMIN = true;
{
'';
};
devices = mkOption {
type = with types; listOf str;
default = [ ];
description = ''
List of devices to attach to this container.
'';
example = literalExpression ''
[
"/dev/dri:/dev/dri"
]
'';
};
privileged = mkOption {
type = with types; bool;
default = false;
description = ''
Give extended privileges to the container
'';
};
networks = mkOption {
type = with types; listOf str;
default = [ ];
description = ''
Networks to attach the container to
'';
};
};
};
isValidLogin = login: login.username != null && login.passwordFile != null && login.registry != null;
isValidLogin =
login: login.username != null && login.passwordFile != null && login.registry != null;
mkService = name: container: let
dependsOn = map (x: "${cfg.backend}-${x}.service") container.dependsOn;
escapedName = escapeShellArg name;
preStartScript = pkgs.writeShellApplication {
name = "pre-start";
runtimeInputs = [ ];
text = ''
${cfg.backend} rm -f ${name} || true
${optionalString (isValidLogin container.login) ''
# try logging in, if it fails, check if image exists locally
${cfg.backend} login \
${container.login.registry} \
--username ${container.login.username} \
--password-stdin < ${container.login.passwordFile} \
|| ${cfg.backend} image inspect ${container.image} >/dev/null \
|| { echo "image doesn't exist locally and login failed" >&2 ; exit 1; }
''}
${optionalString (container.imageFile != null) ''
${cfg.backend} load -i ${container.imageFile}
''}
${optionalString (container.imageStream != null) ''
${container.imageStream} | ${cfg.backend} load
''}
${optionalString (cfg.backend == "podman") ''
rm -f /run/podman-${escapedName}.ctr-id
''}
'';
mkService =
name: container:
let
dependsOn = map (x: "${cfg.backend}-${x}.service") container.dependsOn;
escapedName = escapeShellArg name;
preStartScript = pkgs.writeShellApplication {
name = "pre-start";
runtimeInputs = [ ];
text = ''
${cfg.backend} rm -f ${name} || true
${optionalString (isValidLogin container.login) ''
# try logging in, if it fails, check if image exists locally
${cfg.backend} login \
${container.login.registry} \
--username ${container.login.username} \
--password-stdin < ${container.login.passwordFile} \
|| ${cfg.backend} image inspect ${container.image} >/dev/null \
|| { echo "image doesn't exist locally and login failed" >&2 ; exit 1; }
''}
${optionalString (container.imageFile != null) ''
${cfg.backend} load -i ${container.imageFile}
''}
${optionalString (container.imageStream != null) ''
${container.imageStream} | ${cfg.backend} load
''}
${optionalString (cfg.backend == "podman") ''
rm -f /run/podman-${escapedName}.ctr-id
''}
'';
};
in
{
wantedBy = [ ] ++ optional (container.autoStart) "multi-user.target";
wants = lib.optional (
container.imageFile == null && container.imageStream == null
) "network-online.target";
after =
lib.optionals (cfg.backend == "docker") [
"docker.service"
"docker.socket"
]
# if imageFile or imageStream is not set, the service needs the network to download the image from the registry
++ lib.optionals (container.imageFile == null && container.imageStream == null) [
"network-online.target"
]
++ dependsOn;
requires = dependsOn;
environment = proxy_env;
path =
if cfg.backend == "docker" then
[ config.virtualisation.docker.package ]
else if cfg.backend == "podman" then
[ config.virtualisation.podman.package ]
else
throw "Unhandled backend: ${cfg.backend}";
script = concatStringsSep " \\\n " (
[
"exec ${cfg.backend} "
]
++ map escapeShellArg container.preRunExtraOptions
++ [
"run"
"--rm"
"--name=${escapedName}"
"--log-driver=${container.log-driver}"
]
++ optional (container.entrypoint != null) "--entrypoint=${escapeShellArg container.entrypoint}"
++ optional (container.hostname != null) "--hostname=${escapeShellArg container.hostname}"
++ lib.optionals (cfg.backend == "podman") [
"--cidfile=/run/podman-${escapedName}.ctr-id"
"--cgroups=no-conmon"
"--sdnotify=conmon"
"-d"
"--replace"
]
++ (mapAttrsToList (k: v: "-e ${escapeShellArg k}=${escapeShellArg v}") container.environment)
++ map (f: "--env-file ${escapeShellArg f}") container.environmentFiles
++ map (p: "-p ${escapeShellArg p}") container.ports
++ optional (container.user != null) "-u ${escapeShellArg container.user}"
++ map (v: "-v ${escapeShellArg v}") container.volumes
++ (mapAttrsToList (k: v: "-l ${escapeShellArg k}=${escapeShellArg v}") container.labels)
++ optional (container.workdir != null) "-w ${escapeShellArg container.workdir}"
++ optional (container.privileged) "--privileged"
++ mapAttrsToList (k: _: "--cap-add=${escapeShellArg k}") (
filterAttrs (_: v: v == true) container.capAdd
)
++ mapAttrsToList (k: _: "--cap-drop=${escapeShellArg k}") (
filterAttrs (_: v: v == true) container.capDrop
)
++ map (d: "--device=${escapeShellArg d}") container.devices
++ map (n: "--network=${escapeShellArg n}") container.networks
++ [ "--pull ${escapeShellArg container.pull}" ]
++ map escapeShellArg container.extraOptions
++ [ container.image ]
++ map escapeShellArg container.cmd
);
preStop =
if cfg.backend == "podman" then
"podman stop --ignore --cidfile=/run/podman-${escapedName}.ctr-id"
else
"${cfg.backend} stop ${name} || true";
postStop =
if cfg.backend == "podman" then
"podman rm -f --ignore --cidfile=/run/podman-${escapedName}.ctr-id"
else
"${cfg.backend} rm -f ${name} || true";
serviceConfig =
{
### There is no generalized way of supporting `reload` for docker
### containers. Some containers may respond well to SIGHUP sent to their
### init process, but it is not guaranteed; some apps have other reload
### mechanisms, some don't have a reload signal at all, and some docker
### images just have broken signal handling. The best compromise in this
### case is probably to leave ExecReload undefined, so `systemctl reload`
### will at least result in an error instead of potentially undefined
### behaviour.
###
### Advanced users can still override this part of the unit to implement
### a custom reload handler, since the result of all this is a normal
### systemd service from the perspective of the NixOS module system.
###
# ExecReload = ...;
###
ExecStartPre = [ "${preStartScript}/bin/pre-start" ];
TimeoutStartSec = 0;
TimeoutStopSec = 120;
Restart = "always";
}
// optionalAttrs (cfg.backend == "podman") {
Environment = "PODMAN_SYSTEMD_UNIT=podman-${name}.service";
Type = "notify";
NotifyAccess = "all";
};
};
in {
wantedBy = [] ++ optional (container.autoStart) "multi-user.target";
wants = lib.optional (container.imageFile == null && container.imageStream == null) "network-online.target";
after = lib.optionals (cfg.backend == "docker") [ "docker.service" "docker.socket" ]
# if imageFile or imageStream is not set, the service needs the network to download the image from the registry
++ lib.optionals (container.imageFile == null && container.imageStream == null) [ "network-online.target" ]
++ dependsOn;
requires = dependsOn;
environment = proxy_env;
path =
if cfg.backend == "docker" then [ config.virtualisation.docker.package ]
else if cfg.backend == "podman" then [ config.virtualisation.podman.package ]
else throw "Unhandled backend: ${cfg.backend}";
script = concatStringsSep " \\\n " ([
"exec ${cfg.backend} "
] ++ map escapeShellArg container.preRunExtraOptions ++ [
"run"
"--rm"
"--name=${escapedName}"
"--log-driver=${container.log-driver}"
] ++ optional (container.entrypoint != null)
"--entrypoint=${escapeShellArg container.entrypoint}"
++ optional (container.hostname != null)
"--hostname=${escapeShellArg container.hostname}"
++ lib.optionals (cfg.backend == "podman") [
"--cidfile=/run/podman-${escapedName}.ctr-id"
"--cgroups=no-conmon"
"--sdnotify=conmon"
"-d"
"--replace"
] ++ (mapAttrsToList (k: v: "-e ${escapeShellArg k}=${escapeShellArg v}") container.environment)
++ map (f: "--env-file ${escapeShellArg f}") container.environmentFiles
++ map (p: "-p ${escapeShellArg p}") container.ports
++ optional (container.user != null) "-u ${escapeShellArg container.user}"
++ map (v: "-v ${escapeShellArg v}") container.volumes
++ (mapAttrsToList (k: v: "-l ${escapeShellArg k}=${escapeShellArg v}") container.labels)
++ optional (container.workdir != null) "-w ${escapeShellArg container.workdir}"
++ map escapeShellArg container.extraOptions
++ [container.image]
++ map escapeShellArg container.cmd
);
preStop = if cfg.backend == "podman"
then "podman stop --ignore --cidfile=/run/podman-${escapedName}.ctr-id"
else "${cfg.backend} stop ${name} || true";
postStop = if cfg.backend == "podman"
then "podman rm -f --ignore --cidfile=/run/podman-${escapedName}.ctr-id"
else "${cfg.backend} rm -f ${name} || true";
serviceConfig = {
### There is no generalized way of supporting `reload` for docker
### containers. Some containers may respond well to SIGHUP sent to their
### init process, but it is not guaranteed; some apps have other reload
### mechanisms, some don't have a reload signal at all, and some docker
### images just have broken signal handling. The best compromise in this
### case is probably to leave ExecReload undefined, so `systemctl reload`
### will at least result in an error instead of potentially undefined
### behaviour.
###
### Advanced users can still override this part of the unit to implement
### a custom reload handler, since the result of all this is a normal
### systemd service from the perspective of the NixOS module system.
###
# ExecReload = ...;
###
ExecStartPre = [ "${preStartScript}/bin/pre-start" ];
TimeoutStartSec = 0;
TimeoutStopSec = 120;
Restart = "always";
} // optionalAttrs (cfg.backend == "podman") {
Environment="PODMAN_SYSTEMD_UNIT=podman-${name}.service";
Type="notify";
NotifyAccess="all";
};
};
in {
in
{
imports = [
(
lib.mkChangedOptionModule
[ "docker-containers" ]
[ "virtualisation" "oci-containers" ]
(oldcfg: {
backend = "docker";
containers = lib.mapAttrs (n: v: builtins.removeAttrs (v // {
extraOptions = v.extraDockerOptions or [];
}) [ "extraDockerOptions" ]) oldcfg.docker-containers;
})
)
(lib.mkChangedOptionModule [ "docker-containers" ] [ "virtualisation" "oci-containers" ] (oldcfg: {
backend = "docker";
containers = lib.mapAttrs (
n: v:
builtins.removeAttrs (
v
// {
extraOptions = v.extraDockerOptions or [ ];
}
) [ "extraDockerOptions" ]
) oldcfg.docker-containers;
}))
];
options.virtualisation.oci-containers = {
backend = mkOption {
type = types.enum [ "podman" "docker" ];
type = types.enum [
"podman"
"docker"
];
default = if versionAtLeast config.system.stateVersion "22.05" then "podman" else "docker";
description = "The underlying Docker implementation to use.";
};
containers = mkOption {
default = {};
default = { };
type = types.attrsOf (types.submodule containerOptions);
description = "OCI (Docker) containers to run as systemd services.";
};
};
config = lib.mkIf (cfg.containers != {}) (lib.mkMerge [
{
systemd.services = mapAttrs' (n: v: nameValuePair "${cfg.backend}-${n}" (mkService n v)) cfg.containers;
config = lib.mkIf (cfg.containers != { }) (
lib.mkMerge [
{
systemd.services = mapAttrs' (
n: v: nameValuePair "${cfg.backend}-${n}" (mkService n v)
) cfg.containers;
assertions =
let
toAssertion = _: { imageFile, imageStream, ... }:
{ assertion = imageFile == null || imageStream == null;
assertions =
let
toAssertion =
_:
{ imageFile, imageStream, ... }:
{
assertion = imageFile == null || imageStream == null;
message = "You can only define one of imageFile and imageStream";
};
message = "You can only define one of imageFile and imageStream";
};
in
in
lib.mapAttrsToList toAssertion cfg.containers;
}
(lib.mkIf (cfg.backend == "podman") {
virtualisation.podman.enable = true;
})
(lib.mkIf (cfg.backend == "docker") {
virtualisation.docker.enable = true;
})
]);
}
(lib.mkIf (cfg.backend == "podman") {
virtualisation.podman.enable = true;
})
(lib.mkIf (cfg.backend == "docker") {
virtualisation.docker.enable = true;
})
]
);
}

View File

@ -22,6 +22,16 @@ let
image = "nginx-container";
imageStream = pkgs.dockerTools.examples.nginxStream;
ports = ["8181:80"];
capAdd = {
CAP_AUDIT_READ = true;
};
capDrop = {
CAP_AUDIT_WRITE = true;
};
privileged = false;
devices = [
"/dev/random:/dev/random"
];
};
};
@ -32,11 +42,18 @@ let
};
testScript = ''
import json
start_all()
${backend}.wait_for_unit("${backend}-nginx.service")
${backend}.wait_for_open_port(8181)
${backend}.wait_until_succeeds("curl -f http://localhost:8181 | grep Hello")
output = json.loads(${backend}.succeed("${backend} inspect nginx --format json").strip())[0]
${backend}.succeed("systemctl stop ${backend}-nginx.service", timeout=10)
assert output['HostConfig']['CapAdd'] == ["CAP_AUDIT_READ"]
assert output['HostConfig']['CapDrop'] == ${if backend == "docker" then "[\"CAP_AUDIT_WRITE\"]" else "[]"} # Rootless podman runs with no capabilities so it cannot drop them
assert output['HostConfig']['Privileged'] == False
assert output['HostConfig']['Devices'] == [{'PathOnHost': '/dev/random', 'PathInContainer': '/dev/random', 'CgroupPermissions': '${if backend == "docker" then "rwm" else ""}'}]
'';
};