Add support for lightweight NixOS containers

You can now say:

  systemd.containers.foo.config =
    { services.openssh.enable = true;
      services.openssh.ports = [ 2022 ];
      users.extraUsers.root.openssh.authorizedKeys.keys = [ "ssh-dss ..." ];
    };

which defines a NixOS instance with the given configuration running
inside a lightweight container.

You can also manage the configuration of the container independently
from the host:

  systemd.containers.foo.path = "/nix/var/nix/profiles/containers/foo";

where "path" is a NixOS system profile.  It can be created/updated by
doing:

  $ nix-env --set -p /nix/var/nix/profiles/containers/foo \
      -f '<nixos>' -A system -I nixos-config=foo.nix

The container configuration (foo.nix) should define

  boot.isContainer = true;

to optimise away the building of a kernel and initrd.  This is done
automatically when using the "config" route.

On the host, a lightweight container appears as the service
"container-<name>.service".  The container is like a regular NixOS
(virtual) machine, except that it doesn't have its own kernel.  It has
its own root file system (by default /var/lib/containers/<name>), but
shares the Nix store of the host (as a read-only bind mount).  It also
has access to the network devices of the host.

Currently, if the configuration of the container changes, running
"nixos-rebuild switch" on the host will cause the container to be
rebooted.  In the future we may want to send some message to the
container so that it can activate the new container configuration
without rebooting.

Containers are not perfectly isolated yet.  In particular, the host's
/sys/fs/cgroup is mounted (writable!) in the guest.
This commit is contained in:
Eelco Dolstra 2013-11-27 16:54:20 +01:00
parent 57f145a7f8
commit 9ee30cd9b5
10 changed files with 172 additions and 27 deletions

View File

@ -8,6 +8,7 @@
, extraArgs ? {} , extraArgs ? {}
, modules , modules
, check ? true , check ? true
, prefix ? []
}: }:
let extraArgs_ = extraArgs; pkgs_ = pkgs; system_ = system; in let extraArgs_ = extraArgs; pkgs_ = pkgs; system_ = system; in
@ -17,6 +18,7 @@ rec {
# Merge the option definitions in all modules, forming the full # Merge the option definitions in all modules, forming the full
# system configuration. # system configuration.
inherit (pkgs.lib.evalModules { inherit (pkgs.lib.evalModules {
inherit prefix;
modules = modules ++ baseModules; modules = modules ++ baseModules;
args = extraArgs; args = extraArgs;
check = check && options.environment.checkConfigurationOptions.value; check = check && options.environment.checkConfigurationOptions.value;
@ -48,7 +50,7 @@ rec {
let let
system = if nixpkgsOptions.system != "" then nixpkgsOptions.system else system_; system = if nixpkgsOptions.system != "" then nixpkgsOptions.system else system_;
nixpkgsOptions = (import ./eval-config.nix { nixpkgsOptions = (import ./eval-config.nix {
inherit system extraArgs modules; inherit system extraArgs modules prefix;
# For efficiency, leave out most NixOS modules; they don't # For efficiency, leave out most NixOS modules; they don't
# define nixpkgs.config, so it's pointless to evaluate them. # define nixpkgs.config, so it's pointless to evaluate them.
baseModules = [ ../modules/misc/nixpkgs.nix ]; baseModules = [ ../modules/misc/nixpkgs.nix ];

View File

@ -247,11 +247,11 @@
./system/boot/kexec.nix ./system/boot/kexec.nix
./system/boot/loader/efi.nix ./system/boot/loader/efi.nix
./system/boot/loader/generations-dir/generations-dir.nix ./system/boot/loader/generations-dir/generations-dir.nix
./system/boot/loader/gummiboot/gummiboot.nix
./system/boot/loader/raspberrypi/raspberrypi.nix
./system/boot/loader/grub/grub.nix ./system/boot/loader/grub/grub.nix
./system/boot/loader/grub/memtest.nix ./system/boot/loader/grub/memtest.nix
./system/boot/loader/gummiboot/gummiboot.nix
./system/boot/loader/init-script/init-script.nix ./system/boot/loader/init-script/init-script.nix
./system/boot/loader/raspberrypi/raspberrypi.nix
./system/boot/luksroot.nix ./system/boot/luksroot.nix
./system/boot/modprobe.nix ./system/boot/modprobe.nix
./system/boot/shutdown.nix ./system/boot/shutdown.nix
@ -276,6 +276,7 @@
./tasks/scsi-link-power-management.nix ./tasks/scsi-link-power-management.nix
./tasks/swraid.nix ./tasks/swraid.nix
./testing/service-runner.nix ./testing/service-runner.nix
./virtualisation/containers.nix
./virtualisation/libvirtd.nix ./virtualisation/libvirtd.nix
#./virtualisation/nova.nix #./virtualisation/nova.nix
./virtualisation/virtualbox-guest.nix ./virtualisation/virtualbox-guest.nix

View File

@ -209,7 +209,7 @@ in
###### implementation ###### implementation
config = { config = mkIf (!config.boot.isContainer) {
services.udev.extraRules = nixosRules; services.udev.extraRules = nixosRules;
@ -231,9 +231,16 @@ in
boot.extraModprobeConfig = "options firmware_class path=${config.hardware.firmware}"; boot.extraModprobeConfig = "options firmware_class path=${config.hardware.firmware}";
system.activationScripts.clearHotplug = system.activationScripts.udevd =
'' ''
echo "" > /proc/sys/kernel/hotplug echo "" > /proc/sys/kernel/hotplug
# Regenerate the hardware database /var/lib/udev/hwdb.bin
# whenever systemd changes.
if [ ! -e /var/lib/udev/prev-systemd -o "$(readlink /var/lib/udev/prev-systemd)" != ${config.systemd.package} ]; then
echo "regenerating udev hardware database..."
${config.systemd.package}/bin/udevadm hwdb --update && ln -sfn ${config.systemd.package} /var/lib/udev/prev-systemd
fi
''; '';
}; };

View File

@ -34,16 +34,24 @@ let
in '' in ''
mkdir $out mkdir $out
if [ ! -f ${kernelPath} ]; then # Containers don't have their own kernel or initrd. They boot
echo "The bootloader cannot find the proper kernel image." # directly into stage 2.
echo "(Expecting ${kernelPath})" ${optionalString (!config.boot.isContainer) ''
false if [ ! -f ${kernelPath} ]; then
fi echo "The bootloader cannot find the proper kernel image."
echo "(Expecting ${kernelPath})"
false
fi
ln -s ${kernelPath} $out/kernel ln -s ${kernelPath} $out/kernel
ln -s ${config.system.modulesTree} $out/kernel-modules ln -s ${config.system.modulesTree} $out/kernel-modules
ln -s ${config.system.build.initialRamdisk}/initrd $out/initrd echo -n "$kernelParams" > $out/kernel-params
ln -s ${config.system.build.initialRamdisk}/initrd $out/initrd
ln -s ${config.hardware.firmware} $out/firmware
''}
echo "$activationScript" > $out/activate echo "$activationScript" > $out/activate
substituteInPlace $out/activate --subst-var out substituteInPlace $out/activate --subst-var out
@ -56,9 +64,7 @@ let
ln -s ${config.system.build.etc}/etc $out/etc ln -s ${config.system.build.etc}/etc $out/etc
ln -s ${config.system.path} $out/sw ln -s ${config.system.path} $out/sw
ln -s "$systemd" $out/systemd ln -s "$systemd" $out/systemd
ln -s ${config.hardware.firmware} $out/firmware
echo -n "$kernelParams" > $out/kernel-params
echo -n "$configurationName" > $out/configuration-name echo -n "$configurationName" > $out/configuration-name
echo -n "systemd ${toString config.systemd.package.interfaceVersion}" > $out/init-interface-version echo -n "systemd ${toString config.systemd.package.interfaceVersion}" > $out/init-interface-version
echo -n "$nixosVersion" > $out/nixos-version echo -n "$nixosVersion" > $out/nixos-version
@ -92,7 +98,6 @@ let
systemd = config.systemd.package; systemd = config.systemd.package;
inherit children; inherit children;
kernelParams = config.boot.kernelParams;
installBootLoader = installBootLoader =
config.system.build.installBootLoader config.system.build.installBootLoader
or "echo 'Warning: do not know how to make this configuration bootable; please enable a boot loader.' 1>&2; true"; or "echo 'Warning: do not know how to make this configuration bootable; please enable a boot loader.' 1>&2; true";

View File

@ -145,7 +145,7 @@ in
###### implementation ###### implementation
config = { config = mkIf (!config.boot.isContainer) {
system.build = { inherit kernel; }; system.build = { inherit kernel; };

View File

@ -44,7 +44,7 @@ in
boot.loader.grub = { boot.loader.grub = {
enable = mkOption { enable = mkOption {
default = true; default = !config.boot.isContainer;
type = types.bool; type = types.bool;
description = '' description = ''
Whether to enable the GNU GRUB boot loader. Whether to enable the GNU GRUB boot loader.

View File

@ -66,7 +66,7 @@ with pkgs.lib;
###### implementation ###### implementation
config = { config = mkIf (!config.boot.isContainer) {
environment.etc = singleton environment.etc = singleton
{ source = pkgs.writeText "modprobe.conf" { source = pkgs.writeText "modprobe.conf"

View File

@ -328,7 +328,7 @@ in
}; };
config = { config = mkIf (!config.boot.isContainer) {
assertions = singleton assertions = singleton
{ assertion = any (fs: fs.mountPoint == "/") (attrValues config.fileSystems); { assertion = any (fs: fs.mountPoint == "/") (attrValues config.fileSystems);

View File

@ -604,13 +604,6 @@ in
mkdir -p /var/log/journal mkdir -p /var/log/journal
chmod 0755 /var/log/journal chmod 0755 /var/log/journal
# Regenerate the hardware database /var/lib/udev/hwdb.bin
# whenever systemd changes.
if [ ! -e /var/lib/udev/prev-systemd -o "$(readlink /var/lib/udev/prev-systemd)" != ${systemd} ]; then
echo "regenerating udev hardware database..."
${systemd}/bin/udevadm hwdb --update && ln -sfn ${systemd} /var/lib/udev/prev-systemd
fi
# Make all journals readable to users in the wheel and adm # Make all journals readable to users in the wheel and adm
# groups, in addition to those in the systemd-journal group. # groups, in addition to those in the systemd-journal group.
# Users can always read their own journals. # Users can always read their own journals.

View File

@ -0,0 +1,137 @@
{ config, pkgs, ... }:
with pkgs.lib;
{
options = {
boot.isContainer = mkOption {
type = types.bool;
default = false;
description = ''
Whether this NixOS machine is a lightweight container running
in another NixOS system.
'';
};
systemd.containers = mkOption {
type = types.attrsOf (types.submodule (
{ config, options, name, ... }:
{
options = {
root = mkOption {
type = types.path;
description = ''
The root directory of the container.
'';
};
config = mkOption {
description = ''
A specification of the desired configuration of this
container, as a NixOS module.
'';
};
path = mkOption {
type = types.path;
example = "/nix/var/nix/profiles/containers/webserver";
description = ''
As an alternative to specifying
<option>config</option>, you can specify the path to
the evaluated NixOS system configuration, typically a
symlink to a system profile.
'';
};
};
config = mkMerge
[ { root = mkDefault "/var/lib/containers/${name}";
}
(mkIf options.config.isDefined {
path = (import ../../lib/eval-config.nix {
modules =
let extraConfig =
{ boot.isContainer = true;
security.initialRootPassword = "!";
networking.hostName = mkDefault name;
};
in [ extraConfig config.config ];
prefix = [ "systemd" "containers" name ];
}).config.system.build.toplevel;
})
];
}));
default = {};
example = literalExample
''
{ webserver =
{ root = "/containers/webserver";
path = "/nix/var/nix/profiles/webserver";
};
database =
{ root = "/containers/database";
config =
{ config, pkgs, ... }:
{ services.postgresql.enable = true;
services.postgresql.package = pkgs.postgresql92;
};
};
}
'';
description = ''
A set of NixOS system configurations to be run as lightweight
containers. Each container appears as a service
<literal>container-<replaceable>name</replaceable></literal>
on the host system, allowing it to be started and stopped via
<command>systemctl</command> .
'';
};
};
config = {
systemd.services = mapAttrs' (name: container: nameValuePair "container-${name}"
{ description = "Container '${name}'";
wantedBy = [ "multi-user.target" ];
unitConfig.RequiresMountsFor = [ container.root ];
preStart =
''
mkdir -p -m 0755 ${container.root}/etc
if ! [ -e ${container.root}/etc/os-release ]; then
touch ${container.root}/etc/os-release
fi
'';
serviceConfig.ExecStart =
"${config.systemd.package}/bin/systemd-nspawn -M ${name} -D ${container.root} --bind-ro=/nix ${container.path}/init";
preStop =
''
pid="$(cat /sys/fs/cgroup/systemd/machine/${name}.nspawn/system/tasks 2> /dev/null)"
if [ -n "$pid" ]; then
# Send the RTMIN+3 signal, which causes the container
# systemd to start halt.target.
echo "killing container systemd, PID = $pid"
kill -RTMIN+3 $pid
# Wait for the container to exit. We can't let systemd
# do this because it will send a signal to the entire
# cgroup.
for ((n = 0; n < 180; n++)); do
if ! kill -0 $pid 2> /dev/null; then break; fi
sleep 1
done
fi
'';
}) config.systemd.containers;
};
}