Merge pull request #328385 from rorosen/k3s-kubelet-config

nixos/k3s: add options for graceful node shutdown and kubelet config
This commit is contained in:
❄️ 2024-07-25 10:19:16 -03:00 committed by GitHub
commit 5f2a6de6f9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 182 additions and 36 deletions

View File

@ -359,6 +359,53 @@ in
by the k3s agent. This option only makes sense on nodes with an enabled agent.
'';
};
gracefulNodeShutdown = {
enable = lib.mkEnableOption ''
graceful node shutdowns where the kubelet attempts to detect
node system shutdown and terminates pods running on the node. See the
[documentation](https://kubernetes.io/docs/concepts/cluster-administration/node-shutdown/#graceful-node-shutdown)
for further information.
'';
shutdownGracePeriod = lib.mkOption {
type = lib.types.nonEmptyStr;
default = "30s";
example = "1m30s";
description = ''
Specifies the total duration that the node should delay the shutdown by. This is the total
grace period for pod termination for both regular and critical pods.
'';
};
shutdownGracePeriodCriticalPods = lib.mkOption {
type = lib.types.nonEmptyStr;
default = "10s";
example = "15s";
description = ''
Specifies the duration used to terminate critical pods during a node shutdown. This should be
less than `shutdownGracePeriod`.
'';
};
};
extraKubeletConfig = lib.mkOption {
type = with lib.types; attrsOf anything;
default = { };
example = {
podsPerCore = 3;
memoryThrottlingFactor = 0.69;
containerLogMaxSize = "5Mi";
};
description = ''
Extra configuration to add to the kubelet's configuration file. The subset of the kubelet's
configuration that can be configured via a file is defined by the
[KubeletConfiguration](https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/)
struct. See the
[documentation](https://kubernetes.io/docs/tasks/administer-cluster/kubelet-config-file/)
for further information.
'';
};
};
# implementation
@ -397,43 +444,59 @@ in
environment.systemPackages = [ config.services.k3s.package ];
systemd.services.k3s = {
description = "k3s service";
after = [
"firewall.service"
"network-online.target"
];
wants = [
"firewall.service"
"network-online.target"
];
wantedBy = [ "multi-user.target" ];
path = optional config.boot.zfs.enabled config.boot.zfs.package;
serviceConfig = {
# See: https://github.com/rancher/k3s/blob/dddbd16305284ae4bd14c0aade892412310d7edc/install.sh#L197
Type = if cfg.role == "agent" then "exec" else "notify";
KillMode = "process";
Delegate = "yes";
Restart = "always";
RestartSec = "5s";
LimitNOFILE = 1048576;
LimitNPROC = "infinity";
LimitCORE = "infinity";
TasksMax = "infinity";
EnvironmentFile = cfg.environmentFile;
ExecStartPre = activateK3sContent;
ExecStart = concatStringsSep " \\\n " (
[ "${cfg.package}/bin/k3s ${cfg.role}" ]
++ (optional cfg.clusterInit "--cluster-init")
++ (optional cfg.disableAgent "--disable-agent")
++ (optional (cfg.serverAddr != "") "--server ${cfg.serverAddr}")
++ (optional (cfg.token != "") "--token ${cfg.token}")
++ (optional (cfg.tokenFile != null) "--token-file ${cfg.tokenFile}")
++ (optional (cfg.configPath != null) "--config ${cfg.configPath}")
++ (lib.flatten cfg.extraFlags)
systemd.services.k3s =
let
kubeletParams =
(lib.optionalAttrs (cfg.gracefulNodeShutdown.enable) {
inherit (cfg.gracefulNodeShutdown) shutdownGracePeriod shutdownGracePeriodCriticalPods;
})
// cfg.extraKubeletConfig;
kubeletConfig = (pkgs.formats.yaml { }).generate "k3s-kubelet-config" (
{
apiVersion = "kubelet.config.k8s.io/v1beta1";
kind = "KubeletConfiguration";
}
// kubeletParams
);
in
{
description = "k3s service";
after = [
"firewall.service"
"network-online.target"
];
wants = [
"firewall.service"
"network-online.target"
];
wantedBy = [ "multi-user.target" ];
path = optional config.boot.zfs.enabled config.boot.zfs.package;
serviceConfig = {
# See: https://github.com/rancher/k3s/blob/dddbd16305284ae4bd14c0aade892412310d7edc/install.sh#L197
Type = if cfg.role == "agent" then "exec" else "notify";
KillMode = "process";
Delegate = "yes";
Restart = "always";
RestartSec = "5s";
LimitNOFILE = 1048576;
LimitNPROC = "infinity";
LimitCORE = "infinity";
TasksMax = "infinity";
EnvironmentFile = cfg.environmentFile;
ExecStartPre = activateK3sContent;
ExecStart = concatStringsSep " \\\n " (
[ "${cfg.package}/bin/k3s ${cfg.role}" ]
++ (optional cfg.clusterInit "--cluster-init")
++ (optional cfg.disableAgent "--disable-agent")
++ (optional (cfg.serverAddr != "") "--server ${cfg.serverAddr}")
++ (optional (cfg.token != "") "--token ${cfg.token}")
++ (optional (cfg.tokenFile != null) "--token-file ${cfg.tokenFile}")
++ (optional (cfg.configPath != null) "--config ${cfg.configPath}")
++ (optional (kubeletParams != { }) "--kubelet-arg=config=${kubeletConfig}")
++ (lib.flatten cfg.extraFlags)
);
};
};
};
};
meta.maintainers = lib.teams.k3s.members;

View File

@ -15,6 +15,9 @@ in
inherit (pkgs) etcd;
}
) allK3s;
single-node = lib.mapAttrs (_: k3s: import ./single-node.nix { inherit system pkgs k3s; }) allK3s;
kubelet-config = lib.mapAttrs (
_: k3s: import ./kubelet-config.nix { inherit system pkgs k3s; }
) allK3s;
multi-node = lib.mapAttrs (_: k3s: import ./multi-node.nix { inherit system pkgs k3s; }) allK3s;
single-node = lib.mapAttrs (_: k3s: import ./single-node.nix { inherit system pkgs k3s; }) allK3s;
}

View File

@ -0,0 +1,80 @@
# A test that sets extra kubelet configuration and enables graceful node shutdown
import ../make-test-python.nix (
{
pkgs,
lib,
k3s,
...
}:
let
nodeName = "test";
shutdownGracePeriod = "1m13s";
shutdownGracePeriodCriticalPods = "13s";
podsPerCore = 3;
memoryThrottlingFactor = 0.69;
containerLogMaxSize = "5Mi";
in
{
name = "${k3s.name}-kubelet-config";
nodes.machine =
{ pkgs, ... }:
{
environment.systemPackages = [ pkgs.jq ];
# k3s uses enough resources the default vm fails.
virtualisation.memorySize = 1536;
virtualisation.diskSize = 4096;
services.k3s = {
enable = true;
package = k3s;
# Slightly reduce resource usage
extraFlags = [
"--disable coredns"
"--disable local-storage"
"--disable metrics-server"
"--disable servicelb"
"--disable traefik"
"--node-name ${nodeName}"
];
gracefulNodeShutdown = {
enable = true;
inherit shutdownGracePeriod shutdownGracePeriodCriticalPods;
};
extraKubeletConfig = {
inherit podsPerCore memoryThrottlingFactor containerLogMaxSize;
};
};
};
testScript = ''
import json
start_all()
machine.wait_for_unit("k3s")
# wait until the node is ready
machine.wait_until_succeeds(r"""kubectl wait --for='jsonpath={.status.conditions[?(@.type=="Ready")].status}=True' nodes/${nodeName}""")
# test whether the kubelet registered an inhibitor lock
machine.succeed("systemd-inhibit --list --no-legend | grep \"kubelet.*k3s-server.*shutdown\"")
# run kubectl proxy in the background, close stdout through redirection to not wait for the command to finish
machine.execute("kubectl proxy --address 127.0.0.1 --port=8001 >&2 &")
machine.wait_until_succeeds("nc -z 127.0.0.1 8001")
# get the kubeletconfig
kubelet_config=json.loads(machine.succeed("curl http://127.0.0.1:8001/api/v1/nodes/${nodeName}/proxy/configz | jq '.kubeletconfig'"))
with subtest("Kubelet config values are set correctly"):
assert kubelet_config["shutdownGracePeriod"] == "${shutdownGracePeriod}", \
f"unexpected value for shutdownGracePeriod: {kubelet_config["shutdownGracePeriod"]}"
assert kubelet_config["shutdownGracePeriodCriticalPods"] == "${shutdownGracePeriodCriticalPods}", \
f"unexpected value for shutdownGracePeriodCriticalPods: {kubelet_config["shutdownGracePeriodCriticalPods"]}"
assert kubelet_config["podsPerCore"] == ${toString podsPerCore}, \
f"unexpected value for podsPerCore: {kubelet_config["podsPerCore"]}"
assert kubelet_config["memoryThrottlingFactor"] == ${toString memoryThrottlingFactor}, \
f"unexpected value for memoryThrottlingFactor: {kubelet_config["memoryThrottlingFactor"]}"
assert kubelet_config["containerLogMaxSize"] == "${containerLogMaxSize}", \
f"unexpected value for containerLogMaxSize: {kubelet_config["containerLogMaxSize"]}"
'';
meta.maintainers = lib.teams.k3s.members;
}
)