Merge pull request #18822 from wlhlm/containers-dev-tun

Allow access to /dev/net/tun inside containers
This commit is contained in:
Jörg Thalheim 2016-10-01 00:19:02 +02:00 committed by GitHub
commit a8c172ca4b

View File

@ -129,6 +129,9 @@ let
--setenv HOST_ADDRESS6="$HOST_ADDRESS6" \
--setenv LOCAL_ADDRESS6="$LOCAL_ADDRESS6" \
--setenv PATH="$PATH" \
${if cfg.additionalCapabilities != null then
''--capability="${concatStringsSep " " cfg.additionalCapabilities}"'' else ""
} \
${containerInit cfg} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
'';
@ -205,6 +208,41 @@ let
''
);
serviceDirectives = cfg: {
ExecReload = pkgs.writeScript "reload-container"
''
#! ${pkgs.stdenv.shell} -e
${pkgs.nixos-container}/bin/nixos-container run "$INSTANCE" -- \
bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test"
'';
SyslogIdentifier = "container %i";
EnvironmentFile = "-/etc/containers/%i.conf";
Type = "notify";
# Note that on reboot, systemd-nspawn returns 133, so this
# unit will be restarted. On poweroff, it returns 0, so the
# unit won't be restarted.
RestartForceExitStatus = "133";
SuccessExitStatus = "133";
Restart = "on-failure";
# Hack: we don't want to kill systemd-nspawn, since we call
# "machinectl poweroff" in preStop to shut down the
# container cleanly. But systemd requires sending a signal
# (at least if we want remaining processes to be killed
# after the timeout). So send an ignored signal.
KillMode = "mixed";
KillSignal = "WINCH";
DevicePolicy = "closed";
DeviceAllow = map (d: "${d.node} ${d.modifier}") cfg.allowedDevices;
};
system = config.nixpkgs.system;
bindMountOpts = { name, config, ... }: {
@ -235,6 +273,27 @@ let
};
allowedDeviceOpts = { name, config, ... }: {
options = {
node = mkOption {
example = "/dev/net/tun";
type = types.str;
description = "Path to device node";
};
modifier = mkOption {
example = "rw";
type = types.str;
description = ''
Device node access modifier. Takes a combination
<literal>r</literal> (read), <literal>w</literal> (write), and
<literal>m</literal> (mknod). See the
<literal>systemd.resource-control(5)</literal> man page for more
information.'';
};
};
};
mkBindFlag = d:
let flagPrefix = if d.isReadOnly then " --bind-ro=" else " --bind=";
mountstr = if d.hostPath != null then "${d.hostPath}:${d.mountPoint}" else "${d.mountPoint}";
@ -302,6 +361,8 @@ let
dummyConfig =
{
extraVeths = {};
additionalCapabilities = [];
allowedDevices = [];
hostAddress = null;
hostAddress6 = null;
localAddress = null;
@ -368,6 +429,26 @@ in
'';
};
additionalCapabilities = mkOption {
type = types.listOf types.str;
default = [];
example = [ "CAP_NET_ADMIN" "CAP_MKNOD" ];
description = ''
Grant additional capabilities to the container. See the
capabilities(7) and systemd-nspawn(1) man pages for more
information.
'';
};
enableTun = mkOption {
type = types.bool;
default = false;
description = ''
Allows the container to create and setup tunnel interfaces
by granting the <literal>NET_ADMIN</literal> capability and
enabling access to <literal>/dev/net/tun</literal>.
'';
};
privateNetwork = mkOption {
type = types.bool;
default = false;
@ -422,6 +503,16 @@ in
'';
};
allowedDevices = mkOption {
type = types.listOf types.optionSet;
options = [ allowedDeviceOpts ];
default = [];
example = [ { node = "/dev/net/tun"; modifier = "rw"; } ];
description = ''
A list of device nodes to which the containers has access to.
'';
};
} // networkOptions;
config = mkMerge
@ -488,56 +579,36 @@ in
restartIfChanged = false;
serviceConfig = {
ExecReload = pkgs.writeScript "reload-container"
''
#! ${pkgs.stdenv.shell} -e
${pkgs.nixos-container}/bin/nixos-container run "$INSTANCE" -- \
bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test"
'';
SyslogIdentifier = "container %i";
EnvironmentFile = "-/etc/containers/%i.conf";
Type = "notify";
# Note that on reboot, systemd-nspawn returns 133, so this
# unit will be restarted. On poweroff, it returns 0, so the
# unit won't be restarted.
RestartForceExitStatus = "133";
SuccessExitStatus = "133";
Restart = "on-failure";
# Hack: we don't want to kill systemd-nspawn, since we call
# "machinectl poweroff" in preStop to shut down the
# container cleanly. But systemd requires sending a signal
# (at least if we want remaining processes to be killed
# after the timeout). So send an ignored signal.
KillMode = "mixed";
KillSignal = "WINCH";
DevicePolicy = "closed";
};
serviceConfig = serviceDirectives dummyConfig;
};
in {
systemd.services = listToAttrs (filter (x: x.value != null) (
# The generic container template used by imperative containers
[{ name = "container@"; value = unit; }]
# declarative containers
++ (mapAttrsToList (name: cfg: nameValuePair "container@${name}" (
++ (mapAttrsToList (name: cfg: nameValuePair "container@${name}" (let
config = cfg // (
if cfg.enableTun then
{
allowedDevices = cfg.allowedDevices
++ [ { node = "/dev/net/tun"; modifier = "rw"; } ];
additionalCapabilities = cfg.additionalCapabilities
++ [ "CAP_NET_ADMIN" ];
}
else {});
in
unit // {
preStart = preStartScript cfg;
script = startScript cfg;
postStart = postStartScript cfg;
preStart = preStartScript config;
script = startScript config;
postStart = postStartScript config;
serviceConfig = serviceDirectives config;
} // (
if cfg.autoStart then
if config.autoStart then
{
wantedBy = [ "multi-user.target" ];
wants = [ "network.target" ];
after = [ "network.target" ];
restartTriggers = [ cfg.path ];
restartTriggers = [ config.path ];
reloadIfChanged = true;
}
else {})