nixpkgs/nixos/lib/systemd-lib.nix

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

446 lines
15 KiB
Nix
Raw Normal View History

2015-04-20 09:31:17 +00:00
{ config, lib, pkgs }:
with lib;
let
cfg = config.systemd;
2021-10-21 01:55:36 +00:00
lndir = "${pkgs.buildPackages.xorg.lndir}/bin/lndir";
systemd = cfg.package;
in rec {
2015-04-20 09:31:17 +00:00
shellEscape = s: (replaceChars [ "\\" ] [ "\\\\" ] s);
nixos: Add 'chroot' options to systemd.services Currently, if you want to properly chroot a systemd service, you could do it using BindReadOnlyPaths=/nix/store (which is not what I'd call "properly", because the whole store is still accessible) or use a separate derivation that gathers the runtime closure of the service you want to chroot. The former is the easier method and there is also a method directly offered by systemd, called ProtectSystem, which still leaves the whole store accessible. The latter however is a bit more involved, because you need to bind-mount each store path of the runtime closure of the service you want to chroot. This can be achieved using pkgs.closureInfo and a small derivation that packs everything into a systemd unit, which later can be added to systemd.packages. That's also what I did several times[1][2] in the past. However, this process got a bit tedious, so I decided that it would be generally useful for NixOS, so this very implementation was born. Now if you want to chroot a systemd service, all you need to do is: { systemd.services.yourservice = { description = "My Shiny Service"; wantedBy = [ "multi-user.target" ]; chroot.enable = true; serviceConfig.ExecStart = "${pkgs.myservice}/bin/myservice"; }; } If more than the dependencies for the ExecStart* and ExecStop* (which btw. also includes "script" and {pre,post}Start) need to be in the chroot, it can be specified using the chroot.packages option. By default (which uses the "full-apivfs"[3] confinement mode), a user namespace is set up as well and /proc, /sys and /dev are mounted appropriately. In addition - and by default - a /bin/sh executable is provided as well, which is useful for most programs that use the system() C library call to execute commands via shell. The shell providing /bin/sh is dash instead of the default in NixOS (which is bash), because it's way more lightweight and after all we're chrooting because we want to lower the attack surface and it should be only used for "/bin/sh -c something". Prior to submitting this here, I did a first implementation of this outside[4] of nixpkgs, which duplicated the "pathSafeName" functionality from systemd-lib.nix, just because it's only a single line. However, I decided to just re-use the one from systemd here and subsequently made it available when importing systemd-lib.nix, so that the systemd-chroot implementation also benefits from fixes to that functionality (which is now a proper function). Unfortunately, we do have a few limitations as well. The first being that DynamicUser doesn't work in conjunction with tmpfs, because it already sets up a tmpfs in a different path and simply ignores the one we define. We could probably solve this by detecting it and try to bind-mount our paths to that different path whenever DynamicUser is enabled. The second limitation/issue is that RootDirectoryStartOnly doesn't work right now, because it only affects the RootDirectory option and not the individual bind mounts or our tmpfs. It would be helpful if systemd would have a way to disable specific bind mounts as well or at least have some way to ignore failures for the bind mounts/tmpfs setup. Another quirk we do have right now is that systemd tries to create a /usr directory within the chroot, which subsequently fails. Fortunately, this is just an ugly error and not a hard failure. [1]: https://github.com/headcounter/shabitica/blob/3bb01728a0237ad5e7/default.nix#L43-L62 [2]: https://github.com/aszlig/avonc/blob/dedf29e092481a33dc/nextcloud.nix#L103-L124 [3]: The reason this is called "full-apivfs" instead of just "full" is to make room for a *real* "full" confinement mode, which is more restrictive even. [4]: https://github.com/aszlig/avonc/blob/92a20bece4df54625e/systemd-chroot.nix Signed-off-by: aszlig <aszlig@nix.build>
2019-03-10 11:21:55 +00:00
mkPathSafeName = lib.replaceChars ["@" ":" "\\" "[" "]"] ["-" "-" "-" "" ""];
# a type for options that take a unit name
unitNameType = types.strMatching "[a-zA-Z0-9@%:_.\\-]+[.](service|socket|device|mount|automount|swap|target|path|timer|scope|slice)";
2015-04-20 09:31:17 +00:00
makeUnit = name: unit:
if unit.enable then
nixos: Add 'chroot' options to systemd.services Currently, if you want to properly chroot a systemd service, you could do it using BindReadOnlyPaths=/nix/store (which is not what I'd call "properly", because the whole store is still accessible) or use a separate derivation that gathers the runtime closure of the service you want to chroot. The former is the easier method and there is also a method directly offered by systemd, called ProtectSystem, which still leaves the whole store accessible. The latter however is a bit more involved, because you need to bind-mount each store path of the runtime closure of the service you want to chroot. This can be achieved using pkgs.closureInfo and a small derivation that packs everything into a systemd unit, which later can be added to systemd.packages. That's also what I did several times[1][2] in the past. However, this process got a bit tedious, so I decided that it would be generally useful for NixOS, so this very implementation was born. Now if you want to chroot a systemd service, all you need to do is: { systemd.services.yourservice = { description = "My Shiny Service"; wantedBy = [ "multi-user.target" ]; chroot.enable = true; serviceConfig.ExecStart = "${pkgs.myservice}/bin/myservice"; }; } If more than the dependencies for the ExecStart* and ExecStop* (which btw. also includes "script" and {pre,post}Start) need to be in the chroot, it can be specified using the chroot.packages option. By default (which uses the "full-apivfs"[3] confinement mode), a user namespace is set up as well and /proc, /sys and /dev are mounted appropriately. In addition - and by default - a /bin/sh executable is provided as well, which is useful for most programs that use the system() C library call to execute commands via shell. The shell providing /bin/sh is dash instead of the default in NixOS (which is bash), because it's way more lightweight and after all we're chrooting because we want to lower the attack surface and it should be only used for "/bin/sh -c something". Prior to submitting this here, I did a first implementation of this outside[4] of nixpkgs, which duplicated the "pathSafeName" functionality from systemd-lib.nix, just because it's only a single line. However, I decided to just re-use the one from systemd here and subsequently made it available when importing systemd-lib.nix, so that the systemd-chroot implementation also benefits from fixes to that functionality (which is now a proper function). Unfortunately, we do have a few limitations as well. The first being that DynamicUser doesn't work in conjunction with tmpfs, because it already sets up a tmpfs in a different path and simply ignores the one we define. We could probably solve this by detecting it and try to bind-mount our paths to that different path whenever DynamicUser is enabled. The second limitation/issue is that RootDirectoryStartOnly doesn't work right now, because it only affects the RootDirectory option and not the individual bind mounts or our tmpfs. It would be helpful if systemd would have a way to disable specific bind mounts as well or at least have some way to ignore failures for the bind mounts/tmpfs setup. Another quirk we do have right now is that systemd tries to create a /usr directory within the chroot, which subsequently fails. Fortunately, this is just an ugly error and not a hard failure. [1]: https://github.com/headcounter/shabitica/blob/3bb01728a0237ad5e7/default.nix#L43-L62 [2]: https://github.com/aszlig/avonc/blob/dedf29e092481a33dc/nextcloud.nix#L103-L124 [3]: The reason this is called "full-apivfs" instead of just "full" is to make room for a *real* "full" confinement mode, which is more restrictive even. [4]: https://github.com/aszlig/avonc/blob/92a20bece4df54625e/systemd-chroot.nix Signed-off-by: aszlig <aszlig@nix.build>
2019-03-10 11:21:55 +00:00
pkgs.runCommand "unit-${mkPathSafeName name}"
{ preferLocalBuild = true;
allowSubstitutes = false;
inherit (unit) text;
}
2015-04-20 09:31:17 +00:00
''
mkdir -p $out
echo -n "$text" > $out/${shellEscape name}
''
else
nixos: Add 'chroot' options to systemd.services Currently, if you want to properly chroot a systemd service, you could do it using BindReadOnlyPaths=/nix/store (which is not what I'd call "properly", because the whole store is still accessible) or use a separate derivation that gathers the runtime closure of the service you want to chroot. The former is the easier method and there is also a method directly offered by systemd, called ProtectSystem, which still leaves the whole store accessible. The latter however is a bit more involved, because you need to bind-mount each store path of the runtime closure of the service you want to chroot. This can be achieved using pkgs.closureInfo and a small derivation that packs everything into a systemd unit, which later can be added to systemd.packages. That's also what I did several times[1][2] in the past. However, this process got a bit tedious, so I decided that it would be generally useful for NixOS, so this very implementation was born. Now if you want to chroot a systemd service, all you need to do is: { systemd.services.yourservice = { description = "My Shiny Service"; wantedBy = [ "multi-user.target" ]; chroot.enable = true; serviceConfig.ExecStart = "${pkgs.myservice}/bin/myservice"; }; } If more than the dependencies for the ExecStart* and ExecStop* (which btw. also includes "script" and {pre,post}Start) need to be in the chroot, it can be specified using the chroot.packages option. By default (which uses the "full-apivfs"[3] confinement mode), a user namespace is set up as well and /proc, /sys and /dev are mounted appropriately. In addition - and by default - a /bin/sh executable is provided as well, which is useful for most programs that use the system() C library call to execute commands via shell. The shell providing /bin/sh is dash instead of the default in NixOS (which is bash), because it's way more lightweight and after all we're chrooting because we want to lower the attack surface and it should be only used for "/bin/sh -c something". Prior to submitting this here, I did a first implementation of this outside[4] of nixpkgs, which duplicated the "pathSafeName" functionality from systemd-lib.nix, just because it's only a single line. However, I decided to just re-use the one from systemd here and subsequently made it available when importing systemd-lib.nix, so that the systemd-chroot implementation also benefits from fixes to that functionality (which is now a proper function). Unfortunately, we do have a few limitations as well. The first being that DynamicUser doesn't work in conjunction with tmpfs, because it already sets up a tmpfs in a different path and simply ignores the one we define. We could probably solve this by detecting it and try to bind-mount our paths to that different path whenever DynamicUser is enabled. The second limitation/issue is that RootDirectoryStartOnly doesn't work right now, because it only affects the RootDirectory option and not the individual bind mounts or our tmpfs. It would be helpful if systemd would have a way to disable specific bind mounts as well or at least have some way to ignore failures for the bind mounts/tmpfs setup. Another quirk we do have right now is that systemd tries to create a /usr directory within the chroot, which subsequently fails. Fortunately, this is just an ugly error and not a hard failure. [1]: https://github.com/headcounter/shabitica/blob/3bb01728a0237ad5e7/default.nix#L43-L62 [2]: https://github.com/aszlig/avonc/blob/dedf29e092481a33dc/nextcloud.nix#L103-L124 [3]: The reason this is called "full-apivfs" instead of just "full" is to make room for a *real* "full" confinement mode, which is more restrictive even. [4]: https://github.com/aszlig/avonc/blob/92a20bece4df54625e/systemd-chroot.nix Signed-off-by: aszlig <aszlig@nix.build>
2019-03-10 11:21:55 +00:00
pkgs.runCommand "unit-${mkPathSafeName name}-disabled"
{ preferLocalBuild = true;
allowSubstitutes = false;
}
2015-04-20 09:31:17 +00:00
''
mkdir -p $out
ln -s /dev/null $out/${shellEscape name}
'';
boolValues = [true false "yes" "no"];
digits = map toString (range 0 9);
isByteFormat = s:
let
l = reverseList (stringToCharacters s);
suffix = head l;
nums = tail l;
in elem suffix (["K" "M" "G" "T"] ++ digits)
&& all (num: elem num digits) nums;
assertByteFormat = name: group: attr:
optional (attr ? ${name} && ! isByteFormat attr.${name})
"Systemd ${group} field `${name}' must be in byte format [0-9]+[KMGT].";
hexChars = stringToCharacters "0123456789abcdefABCDEF";
isMacAddress = s: stringLength s == 17
&& flip all (splitString ":" s) (bytes:
all (byte: elem byte hexChars) (stringToCharacters bytes)
);
assertMacAddress = name: group: attr:
optional (attr ? ${name} && ! isMacAddress attr.${name})
"Systemd ${group} field `${name}' must be a valid mac address.";
isPort = i: i >= 0 && i <= 65535;
assertPort = name: group: attr:
optional (attr ? ${name} && ! isPort attr.${name})
"Error on the systemd ${group} field `${name}': ${attr.name} is not a valid port number.";
assertValueOneOf = name: values: group: attr:
optional (attr ? ${name} && !elem attr.${name} values)
"Systemd ${group} field `${name}' cannot have value `${toString attr.${name}}'.";
assertHasField = name: group: attr:
optional (!(attr ? ${name}))
"Systemd ${group} field `${name}' must exist.";
assertRange = name: min: max: group: attr:
optional (attr ? ${name} && !(min <= attr.${name} && max >= attr.${name}))
"Systemd ${group} field `${name}' is outside the range [${toString min},${toString max}]";
assertMinimum = name: min: group: attr:
optional (attr ? ${name} && attr.${name} < min)
"Systemd ${group} field `${name}' must be greater than or equal to ${toString min}";
assertOnlyFields = fields: group: attr:
let badFields = filter (name: ! elem name fields) (attrNames attr); in
optional (badFields != [ ])
"Systemd ${group} has extra fields [${concatStringsSep " " badFields}].";
assertInt = name: group: attr:
optional (attr ? ${name} && !isInt attr.${name})
"Systemd ${group} field `${name}' is not an integer";
checkUnitConfig = group: checks: attrs: let
# We're applied at the top-level type (attrsOf unitOption), so the actual
# unit options might contain attributes from mkOverride and mkIf that we need to
# convert into single values before checking them.
defs = mapAttrs (const (v:
if v._type or "" == "override" then v.content
else if v._type or "" == "if" then v.content
else v
)) attrs;
errors = concatMap (c: c group defs) checks;
in if errors == [] then true
else builtins.trace (concatStringsSep "\n" errors) false;
toOption = x:
if x == true then "true"
else if x == false then "false"
else toString x;
attrsToSection = as:
concatStrings (concatLists (mapAttrsToList (name: value:
map (x: ''
${name}=${toOption x}
'')
(if isList value then value else [value]))
as));
generateUnits = { allowCollisions ? true, type, units, upstreamUnits, upstreamWants, packages ? cfg.packages, package ? cfg.package }:
let
typeDir = ({
system = "system";
initrd = "system";
user = "user";
nspawn = "nspawn";
}).${type};
in pkgs.runCommand "${type}-units"
{ preferLocalBuild = true;
allowSubstitutes = false;
} ''
2015-04-20 09:31:17 +00:00
mkdir -p $out
# Copy the upstream systemd units we're interested in.
for i in ${toString upstreamUnits}; do
fn=${package}/example/systemd/${typeDir}/$i
2015-04-20 09:31:17 +00:00
if ! [ -e $fn ]; then echo "missing $fn"; false; fi
if [ -L $fn ]; then
target="$(readlink "$fn")"
if [ ''${target:0:3} = ../ ]; then
ln -s "$(readlink -f "$fn")" $out/
else
cp -pd $fn $out/
fi
else
ln -s $fn $out/
fi
done
# Copy .wants links, but only those that point to units that
# we're interested in.
for i in ${toString upstreamWants}; do
fn=${package}/example/systemd/${typeDir}/$i
2015-04-20 09:31:17 +00:00
if ! [ -e $fn ]; then echo "missing $fn"; false; fi
x=$out/$(basename $fn)
mkdir $x
for i in $fn/*; do
y=$x/$(basename $i)
cp -pd $i $y
if ! [ -e $y ]; then rm $y; fi
done
done
# Symlink all units provided listed in systemd.packages.
packages="${toString packages}"
# Filter duplicate directories
declare -A unique_packages
for k in $packages ; do unique_packages[$k]=1 ; done
for i in ''${!unique_packages[@]}; do
for fn in $i/etc/systemd/${typeDir}/* $i/lib/systemd/${typeDir}/*; do
2015-04-20 09:31:17 +00:00
if ! [[ "$fn" =~ .wants$ ]]; then
if [[ -d "$fn" ]]; then
targetDir="$out/$(basename "$fn")"
mkdir -p "$targetDir"
${lndir} "$fn" "$targetDir"
else
ln -s $fn $out/
fi
2015-04-20 09:31:17 +00:00
fi
done
done
# Symlink all units defined by systemd.units. If these are also
# provided by systemd or systemd.packages, then add them as
# <unit-name>.d/overrides.conf, which makes them extend the
# upstream unit.
for i in ${toString (mapAttrsToList (n: v: v.unit) units)}; do
fn=$(basename $i/*)
if [ -e $out/$fn ]; then
2015-04-20 09:31:17 +00:00
if [ "$(readlink -f $i/$fn)" = /dev/null ]; then
ln -sfn /dev/null $out/$fn
else
${if allowCollisions then ''
mkdir -p $out/$fn.d
ln -s $i/$fn $out/$fn.d/overrides.conf
'' else ''
echo "Found multiple derivations configuring $fn!"
exit 1
''}
2015-04-20 09:31:17 +00:00
fi
else
ln -fs $i/$fn $out/
fi
done
2017-02-01 21:51:16 +00:00
# Create service aliases from aliases option.
${concatStrings (mapAttrsToList (name: unit:
concatMapStrings (name2: ''
ln -sfn '${name}' $out/'${name2}'
'') unit.aliases) units)}
# Create .wants and .requires symlinks from the wantedBy and
2015-04-20 09:31:17 +00:00
# requiredBy options.
${concatStrings (mapAttrsToList (name: unit:
concatMapStrings (name2: ''
mkdir -p $out/'${name2}.wants'
ln -sfn '../${name}' $out/'${name2}.wants'/
'') unit.wantedBy) units)}
${concatStrings (mapAttrsToList (name: unit:
concatMapStrings (name2: ''
mkdir -p $out/'${name2}.requires'
ln -sfn '../${name}' $out/'${name2}.requires'/
'') unit.requiredBy) units)}
${optionalString (type == "system") ''
# Stupid misc. symlinks.
ln -s ${cfg.defaultUnit} $out/default.target
ln -s ${cfg.ctrlAltDelUnit} $out/ctrl-alt-del.target
2015-04-20 09:31:17 +00:00
ln -s rescue.target $out/kbrequest.target
mkdir -p $out/getty.target.wants/
ln -s ../autovt@tty1.service $out/getty.target.wants/
ln -s ../remote-fs.target $out/multi-user.target.wants/
2015-04-20 09:31:17 +00:00
''}
''; # */
makeJobScript = name: text:
let
scriptName = replaceChars [ "\\" "@" ] [ "-" "_" ] (shellEscape name);
out = (pkgs.writeShellScriptBin scriptName ''
set -e
${text}
'').overrideAttrs (_: {
# The derivation name is different from the script file name
# to keep the script file name short to avoid cluttering logs.
name = "unit-script-${scriptName}";
});
in "${out}/bin/${scriptName}";
unitConfig = { config, options, ... }: {
config = {
unitConfig =
optionalAttrs (config.requires != [])
{ Requires = toString config.requires; }
// optionalAttrs (config.wants != [])
{ Wants = toString config.wants; }
// optionalAttrs (config.after != [])
{ After = toString config.after; }
// optionalAttrs (config.before != [])
{ Before = toString config.before; }
// optionalAttrs (config.bindsTo != [])
{ BindsTo = toString config.bindsTo; }
// optionalAttrs (config.partOf != [])
{ PartOf = toString config.partOf; }
// optionalAttrs (config.conflicts != [])
{ Conflicts = toString config.conflicts; }
// optionalAttrs (config.requisite != [])
{ Requisite = toString config.requisite; }
// optionalAttrs (config.restartTriggers != [])
{ X-Restart-Triggers = toString config.restartTriggers; }
// optionalAttrs (config.reloadTriggers != [])
{ X-Reload-Triggers = toString config.reloadTriggers; }
// optionalAttrs (config.description != "") {
Description = config.description; }
// optionalAttrs (config.documentation != []) {
Documentation = toString config.documentation; }
// optionalAttrs (config.onFailure != []) {
OnFailure = toString config.onFailure; }
// optionalAttrs (options.startLimitIntervalSec.isDefined) {
StartLimitIntervalSec = toString config.startLimitIntervalSec;
} // optionalAttrs (options.startLimitBurst.isDefined) {
StartLimitBurst = toString config.startLimitBurst;
};
};
};
serviceConfig = { name, config, ... }: {
config = mkMerge
[ { # Default path for systemd services. Should be quite minimal.
path = mkAfter
[ pkgs.coreutils
pkgs.findutils
pkgs.gnugrep
pkgs.gnused
systemd
];
environment.PATH = "${makeBinPath config.path}:${makeSearchPathOutput "bin" "sbin" config.path}";
}
(mkIf (config.preStart != "")
{ serviceConfig.ExecStartPre =
[ (makeJobScript "${name}-pre-start" config.preStart) ];
})
(mkIf (config.script != "")
{ serviceConfig.ExecStart =
makeJobScript "${name}-start" config.script + " " + config.scriptArgs;
})
(mkIf (config.postStart != "")
{ serviceConfig.ExecStartPost =
[ (makeJobScript "${name}-post-start" config.postStart) ];
})
(mkIf (config.reload != "")
{ serviceConfig.ExecReload =
makeJobScript "${name}-reload" config.reload;
})
(mkIf (config.preStop != "")
{ serviceConfig.ExecStop =
makeJobScript "${name}-pre-stop" config.preStop;
})
(mkIf (config.postStop != "")
{ serviceConfig.ExecStopPost =
makeJobScript "${name}-post-stop" config.postStop;
})
];
};
mountConfig = { config, ... }: {
config = {
mountConfig =
{ What = config.what;
Where = config.where;
} // optionalAttrs (config.type != "") {
Type = config.type;
} // optionalAttrs (config.options != "") {
Options = config.options;
};
};
};
automountConfig = { config, ... }: {
config = {
automountConfig =
{ Where = config.where;
};
};
};
commonUnitText = def: ''
[Unit]
${attrsToSection def.unitConfig}
'';
targetToUnit = name: def:
{ inherit (def) aliases wantedBy requiredBy enable;
text =
''
[Unit]
${attrsToSection def.unitConfig}
'';
};
serviceToUnit = name: def:
{ inherit (def) aliases wantedBy requiredBy enable;
text = commonUnitText def +
''
[Service]
${let env = cfg.globalEnvironment // def.environment;
in concatMapStrings (n:
let s = optionalString (env.${n} != null)
"Environment=${builtins.toJSON "${n}=${env.${n}}"}\n";
# systemd max line length is now 1MiB
# https://github.com/systemd/systemd/commit/e6dde451a51dc5aaa7f4d98d39b8fe735f73d2af
in if stringLength s >= 1048576 then throw "The value of the environment variable ${n} in systemd service ${name}.service is too long." else s) (attrNames env)}
${if def.reloadIfChanged then ''
X-ReloadIfChanged=true
'' else if !def.restartIfChanged then ''
X-RestartIfChanged=false
'' else ""}
${optionalString (!def.stopIfChanged) "X-StopIfChanged=false"}
${attrsToSection def.serviceConfig}
'';
};
socketToUnit = name: def:
{ inherit (def) aliases wantedBy requiredBy enable;
text = commonUnitText def +
''
[Socket]
${attrsToSection def.socketConfig}
${concatStringsSep "\n" (map (s: "ListenStream=${s}") def.listenStreams)}
${concatStringsSep "\n" (map (s: "ListenDatagram=${s}") def.listenDatagrams)}
'';
};
timerToUnit = name: def:
{ inherit (def) aliases wantedBy requiredBy enable;
text = commonUnitText def +
''
[Timer]
${attrsToSection def.timerConfig}
'';
};
pathToUnit = name: def:
{ inherit (def) aliases wantedBy requiredBy enable;
text = commonUnitText def +
''
[Path]
${attrsToSection def.pathConfig}
'';
};
mountToUnit = name: def:
{ inherit (def) aliases wantedBy requiredBy enable;
text = commonUnitText def +
''
[Mount]
${attrsToSection def.mountConfig}
'';
};
automountToUnit = name: def:
{ inherit (def) aliases wantedBy requiredBy enable;
text = commonUnitText def +
''
[Automount]
${attrsToSection def.automountConfig}
'';
};
sliceToUnit = name: def:
{ inherit (def) aliases wantedBy requiredBy enable;
text = commonUnitText def +
''
[Slice]
${attrsToSection def.sliceConfig}
'';
};
2015-04-20 09:31:17 +00:00
}