nixpkgs/nixos/tests/thanos.nix
Silvan Mosberger 4f0dadbf38 treewide: format all inactive Nix files
After final improvements to the official formatter implementation,
this commit now performs the first treewide reformat of Nix files using it.
This is part of the implementation of RFC 166.

Only "inactive" files are reformatted, meaning only files that
aren't being touched by any PR with activity in the past 2 months.
This is to avoid conflicts for PRs that might soon be merged.
Later we can do a full treewide reformat to get the rest,
which should not cause as many conflicts.

A CI check has already been running for some time to ensure that new and
already-formatted files are formatted, so the files being reformatted here
should also stay formatted.

This commit was automatically created and can be verified using

    nix-build a08b3a4d19.tar.gz \
      --argstr baseRev b32a094368
    result/bin/apply-formatting $NIXPKGS_PATH
2024-12-10 20:26:33 +01:00

308 lines
9.2 KiB
Nix

let
grpcPort = 19090;
queryPort = 9090;
minioPort = 9000;
pushgwPort = 9091;
frontPort = 9092;
s3 = {
accessKey = "BKIKJAA5BMMU2RHO6IBB";
secretKey = "V7f1CwQqAcwo80UEIJEjc5gVQUSSx5ohQ9GSrr12";
};
objstore.config = {
type = "S3";
config = {
bucket = "thanos-bucket";
endpoint = "s3:${toString minioPort}";
region = "us-east-1";
access_key = s3.accessKey;
secret_key = s3.secretKey;
insecure = true;
signature_version2 = false;
put_user_metadata = { };
http_config = {
idle_conn_timeout = "0s";
insecure_skip_verify = false;
};
trace = {
enable = false;
};
};
};
in
import ./make-test-python.nix {
name = "prometheus";
nodes = {
prometheus =
{ pkgs, ... }:
{
virtualisation.diskSize = 2 * 1024;
virtualisation.memorySize = 2048;
environment.systemPackages = [ pkgs.jq ];
networking.firewall.allowedTCPPorts = [ grpcPort ];
services.prometheus = {
enable = true;
enableReload = true;
scrapeConfigs = [
{
job_name = "prometheus";
static_configs = [
{
targets = [ "127.0.0.1:${toString queryPort}" ];
labels = {
instance = "localhost";
};
}
];
}
{
job_name = "pushgateway";
scrape_interval = "1s";
static_configs = [
{
targets = [ "127.0.0.1:${toString pushgwPort}" ];
}
];
}
];
rules = [
''
groups:
- name: test
rules:
- record: testrule
expr: count(up{job="prometheus"})
''
];
globalConfig = {
external_labels = {
some_label = "required by thanos";
};
};
extraFlags = [
# Required by thanos
"--storage.tsdb.min-block-duration=5s"
"--storage.tsdb.max-block-duration=5s"
];
};
services.prometheus.pushgateway = {
enable = true;
web.listen-address = ":${toString pushgwPort}";
persistMetrics = true;
persistence.interval = "1s";
stateDir = "prometheus-pushgateway";
};
services.thanos = {
sidecar = {
enable = true;
grpc-address = "0.0.0.0:${toString grpcPort}";
inherit objstore;
};
# TODO: Add some tests for these services:
#rule = {
# enable = true;
# http-address = "0.0.0.0:19194";
# grpc-address = "0.0.0.0:19193";
# query.addresses = [
# "localhost:19191"
# ];
# labels = {
# just = "some";
# nice = "labels";
# };
#};
#
#receive = {
# http-address = "0.0.0.0:19195";
# enable = true;
# labels = {
# just = "some";
# nice = "labels";
# };
#};
};
# Adds a "specialisation" of the above config which allows us to
# "switch" to it and see if the services.prometheus.enableReload
# functionality actually reloads the prometheus service instead of
# restarting it.
specialisation = {
"prometheus-config-change" = {
configuration = {
environment.systemPackages = [ pkgs.yq ];
# This configuration just adds a new prometheus job
# to scrape the node_exporter metrics of the s3 machine.
services.prometheus = {
scrapeConfigs = [
{
job_name = "s3-node_exporter";
static_configs = [
{
targets = [ "s3:9100" ];
}
];
}
];
};
};
};
};
};
query =
{ pkgs, ... }:
{
environment.systemPackages = [ pkgs.jq ];
services.thanos.query = {
enable = true;
http-address = "0.0.0.0:${toString queryPort}";
endpoints = [
"prometheus:${toString grpcPort}"
];
};
services.thanos.query-frontend = {
enable = true;
http-address = "0.0.0.0:${toString frontPort}";
query-frontend.downstream-url = "http://127.0.0.1:${toString queryPort}";
};
};
store =
{ pkgs, ... }:
{
virtualisation.diskSize = 2 * 1024;
virtualisation.memorySize = 2048;
environment.systemPackages = with pkgs; [
jq
thanos
];
services.thanos.store = {
enable = true;
http-address = "0.0.0.0:10902";
grpc-address = "0.0.0.0:${toString grpcPort}";
inherit objstore;
sync-block-duration = "1s";
};
services.thanos.compact = {
enable = true;
http-address = "0.0.0.0:10903";
inherit objstore;
consistency-delay = "5s";
};
services.thanos.query = {
enable = true;
http-address = "0.0.0.0:${toString queryPort}";
endpoints = [
"localhost:${toString grpcPort}"
];
};
};
s3 =
{ pkgs, ... }:
{
# Minio requires at least 1GiB of free disk space to run.
virtualisation = {
diskSize = 2 * 1024;
};
networking.firewall.allowedTCPPorts = [ minioPort ];
services.minio = {
enable = true;
inherit (s3) accessKey secretKey;
};
environment.systemPackages = [ pkgs.minio-client ];
services.prometheus.exporters.node = {
enable = true;
openFirewall = true;
};
};
};
testScript =
{ nodes, ... }:
''
# Before starting the other machines we first make sure that our S3 service is online
# and has a bucket added for thanos:
s3.start()
s3.wait_for_unit("minio.service")
s3.wait_for_open_port(${toString minioPort})
s3.succeed(
"mc config host add minio "
+ "http://localhost:${toString minioPort} "
+ "${s3.accessKey} ${s3.secretKey} --api s3v4",
"mc mb minio/thanos-bucket",
)
# Now that s3 has started we can start the other machines:
for machine in prometheus, query, store:
machine.start()
# Check if prometheus responds to requests:
prometheus.wait_for_unit("prometheus.service")
prometheus.wait_for_open_port(${toString queryPort})
prometheus.succeed("curl -sf http://127.0.0.1:${toString queryPort}/metrics")
# Let's test if pushing a metric to the pushgateway succeeds:
prometheus.wait_for_unit("pushgateway.service")
prometheus.succeed(
"echo 'some_metric 3.14' | "
+ "curl -f --data-binary \@- "
+ "http://127.0.0.1:${toString pushgwPort}/metrics/job/some_job"
)
# Now check whether that metric gets ingested by prometheus.
# Since we'll check for the metric several times on different machines
# we abstract the test using the following function:
# Function to check if the metric "some_metric" has been received and returns the correct value.
def wait_for_metric(machine):
return machine.wait_until_succeeds(
"curl -sf 'http://127.0.0.1:${toString queryPort}/api/v1/query?query=some_metric' | "
+ "jq '.data.result[0].value[1]' | grep '\"3.14\"'"
)
wait_for_metric(prometheus)
# Let's test if the pushgateway persists metrics to the configured location.
prometheus.wait_until_succeeds("test -e /var/lib/prometheus-pushgateway/metrics")
# Test thanos
prometheus.wait_for_unit("thanos-sidecar.service")
# Test if the Thanos query service can correctly retrieve the metric that was send above.
query.wait_for_unit("thanos-query.service")
wait_for_metric(query)
# Test Thanos query frontend service
query.wait_for_unit("thanos-query-frontend.service")
query.succeed("curl -sS http://localhost:${toString frontPort}/-/healthy")
# Test if the Thanos sidecar has correctly uploaded its TSDB to S3, if the
# Thanos storage service has correctly downloaded it from S3 and if the Thanos
# query service running on $store can correctly retrieve the metric:
store.wait_for_unit("thanos-store.service")
wait_for_metric(store)
store.wait_for_unit("thanos-compact.service")
# Test if the Thanos bucket command is able to retrieve blocks from the S3 bucket
# and check if the blocks have the correct labels:
store.succeed(
"thanos tools bucket ls "
+ "--objstore.config-file=${nodes.store.config.services.thanos.store.objstore.config-file} "
+ "--output=json | "
+ "jq .thanos.labels.some_label | "
+ "grep 'required by thanos'"
)
'';
}