nixos/victoriametrics: harden systemd unit, add more options.

This commit is contained in:
Ryan Yin 2024-10-29 10:45:10 +08:00
parent be804f5a18
commit 89b4cb7299
2 changed files with 192 additions and 61 deletions

View File

@ -1,65 +1,188 @@
{ config, pkgs, lib, ... }:
let cfg = config.services.victoriametrics; in
{
options.services.victoriametrics = with lib; {
enable = mkEnableOption "VictoriaMetrics, a time series database, long-term remote storage for Prometheus";
config,
pkgs,
lib,
...
}:
with lib;
let
cfg = config.services.victoriametrics;
settingsFormat = pkgs.formats.yaml { };
startCLIList =
[
"${cfg.package}/bin/victoria-metrics"
"-storageDataPath=/var/lib/${cfg.stateDir}"
"-httpListenAddr=${cfg.listenAddress}"
]
++ lib.optionals (cfg.retentionPeriod != null) [ "-retentionPeriod=${cfg.retentionPeriod}" ]
++ cfg.extraOptions;
prometheusConfigYml = checkedConfig (
settingsFormat.generate "prometheusConfig.yaml" cfg.prometheusConfig
);
checkedConfig =
file:
pkgs.runCommand "checked-config" { nativeBuildInputs = [ cfg.package ]; } ''
ln -s ${file} $out
${lib.escapeShellArgs startCLIList} -promscrape.config=${file} -dryRun
'';
in
{
options.services.victoriametrics = {
enable = mkEnableOption "VictoriaMetrics is a fast, cost-effective and scalable monitoring solution and time series database.";
package = mkPackageOption pkgs "victoriametrics" { };
listenAddress = mkOption {
default = ":8428";
type = types.str;
description = ''
The listen address for the http interface.
TCP address to listen for incoming http requests.
'';
};
retentionPeriod = mkOption {
type = types.int;
default = 1;
stateDir = mkOption {
type = types.str;
default = "victoriametrics";
description = ''
Retention period in months.
Directory below `/var/lib` to store VictoriaMetrics metrics data.
This directory will be created automatically using systemd's StateDirectory mechanism.
'';
};
retentionPeriod = mkOption {
type = types.nullOr types.str;
default = null;
example = "15d";
description = ''
How long to retain samples in storage.
The minimum retentionPeriod is 24h or 1d. See also -retentionFilter
The following optional suffixes are supported: s (second), h (hour), d (day), w (week), y (year).
If suffix isn't set, then the duration is counted in months (default 1)
'';
};
prometheusConfig = lib.mkOption {
type = lib.types.submodule { freeformType = settingsFormat.type; };
default = { };
example = literalExpression ''
{
scrape_configs = [
{
job_name = "postgres-exporter";
metrics_path = "/metrics";
static_configs = [
{
targets = ["1.2.3.4:9187"];
labels.type = "database";
}
];
}
{
job_name = "node-exporter";
metrics_path = "/metrics";
static_configs = [
{
targets = ["1.2.3.4:9100"];
labels.type = "node";
}
{
targets = ["5.6.7.8:9100"];
labels.type = "node";
}
];
}
];
}
'';
description = ''
Config for prometheus style metrics.
See the docs: <https://docs.victoriametrics.com/vmagent/#how-to-collect-metrics-in-prometheus-format>
for more information.
'';
};
extraOptions = mkOption {
type = types.listOf types.str;
default = [];
default = [ ];
example = literalExpression ''
[
"-httpAuth.username=username"
"-httpAuth.password=file:///abs/path/to/file"
"-loggerLevel=WARN"
]
'';
description = ''
Extra options to pass to VictoriaMetrics. See the README:
<https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md>
or {command}`victoriametrics -help` for more
information.
Extra options to pass to VictoriaMetrics. See the docs:
<https://docs.victoriametrics.com/single-server-victoriametrics/#list-of-command-line-flags>
or {command}`victoriametrics -help` for more information.
'';
};
};
config = lib.mkIf cfg.enable {
systemd.services.victoriametrics = {
description = "VictoriaMetrics time series database";
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
startLimitBurst = 5;
serviceConfig = {
Restart = "on-failure";
RestartSec = 1;
StateDirectory = "victoriametrics";
ExecStart = lib.escapeShellArgs (
startCLIList
++ lib.optionals (cfg.prometheusConfig != null) [ "-promscrape.config=${prometheusConfigYml}" ]
);
DynamicUser = true;
ExecStart = ''
${cfg.package}/bin/victoria-metrics \
-storageDataPath=/var/lib/victoriametrics \
-httpListenAddr ${cfg.listenAddress} \
-retentionPeriod ${toString cfg.retentionPeriod} \
${lib.escapeShellArgs cfg.extraOptions}
'';
# victoriametrics 1.59 with ~7GB of data seems to eventually panic when merging files and then
# begins restart-looping forever. Set LimitNOFILE= to a large number to work around this issue.
#
# panic: FATAL: unrecoverable error when merging small parts in the partition "/var/lib/victoriametrics/data/small/2021_08":
# cannot open source part for merging: cannot open values file in stream mode:
# cannot open file "/var/lib/victoriametrics/data/small/2021_08/[...]/values.bin":
# open /var/lib/victoriametrics/data/small/2021_08/[...]/values.bin: too many open files
RestartSec = 1;
Restart = "on-failure";
RuntimeDirectory = "victoriametrics";
RuntimeDirectoryMode = "0700";
StateDirectory = cfg.stateDir;
StateDirectoryMode = "0700";
# Increase the limit to avoid errors like 'too many open files' when merging small parts
LimitNOFILE = 1048576;
# Hardening
DeviceAllow = [ "/dev/null rw" ];
DevicePolicy = "strict";
LockPersonality = true;
MemoryDenyWriteExecute = true;
NoNewPrivileges = true;
PrivateDevices = true;
PrivateTmp = true;
PrivateUsers = true;
ProtectClock = true;
ProtectControlGroups = true;
ProtectHome = true;
ProtectHostname = true;
ProtectKernelLogs = true;
ProtectKernelModules = true;
ProtectKernelTunables = true;
ProtectProc = "invisible";
ProtectSystem = "full";
RemoveIPC = true;
RestrictAddressFamilies = [
"AF_INET"
"AF_INET6"
"AF_UNIX"
];
RestrictNamespaces = true;
RestrictRealtime = true;
RestrictSUIDSGID = true;
SystemCallArchitectures = "native";
SystemCallFilter = [
"@system-service"
"~@privileged"
];
};
wantedBy = [ "multi-user.target" ];
postStart =
let
bindAddr = (lib.optionalString (lib.hasPrefix ":" cfg.listenAddress) "127.0.0.1") + cfg.listenAddress;
bindAddr =
(lib.optionalString (lib.hasPrefix ":" cfg.listenAddress) "127.0.0.1") + cfg.listenAddress;
in
lib.mkBefore ''
until ${lib.getBin pkgs.curl}/bin/curl -s -o /dev/null http://${bindAddr}/ping; do

View File

@ -1,33 +1,41 @@
# This test runs influxdb and checks if influxdb is up and running
# This test runs victoriametrics and checks if victoriametrics is able to write points and run simple query
import ./make-test-python.nix ({ pkgs, ...} : {
name = "victoriametrics";
meta = with pkgs.lib.maintainers; {
maintainers = [ yorickvp ];
};
nodes = {
one = { ... }: {
services.victoriametrics.enable = true;
import ./make-test-python.nix (
{ pkgs, ... }:
{
name = "victoriametrics";
meta = with pkgs.lib.maintainers; {
maintainers = [
yorickvp
ryan4yin
];
};
};
testScript = ''
start_all()
nodes = {
one =
{ ... }:
{
services.victoriametrics.enable = true;
};
};
one.wait_for_unit("victoriametrics.service")
testScript = ''
start_all()
# write some points and run simple query
out = one.succeed(
"curl -f -d 'measurement,tag1=value1,tag2=value2 field1=123,field2=1.23' -X POST 'http://localhost:8428/write'"
)
cmd = (
"""curl -f -s -G 'http://localhost:8428/api/v1/export' -d 'match={__name__!=""}'"""
)
# data takes a while to appear
one.wait_until_succeeds(f"[[ $({cmd} | wc -l) -ne 0 ]]")
out = one.succeed(cmd)
assert '"values":[123]' in out
assert '"values":[1.23]' in out
'';
})
one.wait_for_unit("victoriametrics.service")
# write some points and run simple query
out = one.succeed(
"curl -f -d 'measurement,tag1=value1,tag2=value2 field1=123,field2=1.23' -X POST 'http://localhost:8428/write'"
)
cmd = (
"""curl -f -s -G 'http://localhost:8428/api/v1/export' -d 'match={__name__!=""}'"""
)
# data takes a while to appear
one.wait_until_succeeds(f"[[ $({cmd} | wc -l) -ne 0 ]]")
out = one.succeed(cmd)
assert '"values":[123]' in out
assert '"values":[1.23]' in out
'';
}
)