nixpkgs/nixos/tests/prometheus/config-reload.nix

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

117 lines
3.9 KiB
Nix
Raw Normal View History

import ../make-test-python.nix ({ lib, pkgs, ... }:
{
name = "prometheus-config-reload";
nodes = {
prometheus = { config, pkgs, ... }: {
environment.systemPackages = [ pkgs.jq ];
networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
services.prometheus = {
enable = true;
enableReload = true;
globalConfig.scrape_interval = "2s";
scrapeConfigs = [
{
job_name = "prometheus";
static_configs = [
{
targets = [
"prometheus:${toString config.services.prometheus.port}"
];
}
];
}
];
};
specialisation = {
"prometheus-config-change" = {
configuration = {
environment.systemPackages = [ pkgs.yq ];
# This configuration just adds a new prometheus job
# to scrape the node_exporter metrics of the s3 machine.
services.prometheus = {
scrapeConfigs = [
{
job_name = "node";
static_configs = [
{
targets = [ "node:${toString config.services.prometheus.exporters.node.port}" ];
}
];
}
];
};
};
};
};
};
};
testScript = ''
prometheus.wait_for_unit("prometheus")
prometheus.wait_for_open_port(9090)
# Check if switching to a NixOS configuration that changes the prometheus
# configuration reloads (instead of restarts) prometheus before the switch
# finishes successfully:
with subtest("config change reloads prometheus"):
import json
# We check if prometheus has finished reloading by looking for the message
# "Completed loading of configuration file" in the journal between the start
# and finish of switching to the new NixOS configuration.
#
# To mark the start we record the journal cursor before starting the switch:
cursor_before_switching = json.loads(
prometheus.succeed("journalctl -n1 -o json --output-fields=__CURSOR")
)["__CURSOR"]
# Now we switch:
prometheus_config_change = prometheus.succeed(
"readlink /run/current-system/specialisation/prometheus-config-change"
).strip()
prometheus.succeed(prometheus_config_change + "/bin/switch-to-configuration test")
# Next we retrieve all logs since the start of switching:
logs_after_starting_switching = prometheus.succeed(
"""
journalctl --after-cursor='{cursor_before_switching}' -o json --output-fields=MESSAGE
""".format(
cursor_before_switching=cursor_before_switching
)
)
# Finally we check if the message "Completed loading of configuration file"
# occurs before the "finished switching to system configuration" message:
finished_switching_msg = (
"finished switching to system configuration " + prometheus_config_change
)
reloaded_before_switching_finished = False
finished_switching = False
for log_line in logs_after_starting_switching.split("\n"):
msg = json.loads(log_line)["MESSAGE"]
if "Completed loading of configuration file" in msg:
reloaded_before_switching_finished = True
if msg == finished_switching_msg:
finished_switching = True
break
assert reloaded_before_switching_finished
assert finished_switching
# Check if the reloaded config includes the new node job:
prometheus.succeed(
"""
curl -sf http://127.0.0.1:9090/api/v1/status/config \
| jq -r .data.yaml \
| yq '.scrape_configs | any(.job_name == "node")' \
| grep true
"""
)
'';
})