nixpkgs/nixos/modules/services/monitoring/prometheus/default.nix
Andreas Rammhold bb33cdd44b
Merge pull request #98738 from mayflower/prometheus-retention
nixos/prometheus: add retentionTime
2020-09-25 17:11:27 +02:00

677 lines
20 KiB
Nix

{ config, pkgs, lib, ... }:
with lib;
let
cfg = config.services.prometheus;
workingDir = "/var/lib/" + cfg.stateDir;
# a wrapper that verifies that the configuration is valid
promtoolCheck = what: name: file:
if cfg.checkConfig then
pkgs.runCommandNoCCLocal
"${name}-${replaceStrings [" "] [""] what}-checked"
{ buildInputs = [ cfg.package ]; } ''
ln -s ${file} $out
promtool ${what} $out
'' else file;
# Pretty-print JSON to a file
writePrettyJSON = name: x:
pkgs.runCommandNoCCLocal name {} ''
echo '${builtins.toJSON x}' | ${pkgs.jq}/bin/jq . > $out
'';
generatedPrometheusYml = writePrettyJSON "prometheus.yml" promConfig;
# This becomes the main config file for Prometheus
promConfig = {
global = filterValidPrometheus cfg.globalConfig;
rule_files = map (promtoolCheck "check rules" "rules") (cfg.ruleFiles ++ [
(pkgs.writeText "prometheus.rules" (concatStringsSep "\n" cfg.rules))
]);
scrape_configs = filterValidPrometheus cfg.scrapeConfigs;
alerting = {
inherit (cfg) alertmanagers;
};
};
prometheusYml = let
yml = if cfg.configText != null then
pkgs.writeText "prometheus.yml" cfg.configText
else generatedPrometheusYml;
in promtoolCheck "check config" "prometheus.yml" yml;
cmdlineArgs = cfg.extraFlags ++ [
"--storage.tsdb.path=${workingDir}/data/"
"--config.file=${prometheusYml}"
"--web.listen-address=${cfg.listenAddress}:${builtins.toString cfg.port}"
"--alertmanager.notification-queue-capacity=${toString cfg.alertmanagerNotificationQueueCapacity}"
"--alertmanager.timeout=${toString cfg.alertmanagerTimeout}s"
] ++ optional (cfg.webExternalUrl != null) "--web.external-url=${cfg.webExternalUrl}"
++ optional (cfg.retentionTime != null) "--storage.tsdb.retention.time=${cfg.retentionTime}";
filterValidPrometheus = filterAttrsListRecursive (n: v: !(n == "_module" || v == null));
filterAttrsListRecursive = pred: x:
if isAttrs x then
listToAttrs (
concatMap (name:
let v = x.${name}; in
if pred name v then [
(nameValuePair name (filterAttrsListRecursive pred v))
] else []
) (attrNames x)
)
else if isList x then
map (filterAttrsListRecursive pred) x
else x;
mkDefOpt = type : defaultStr : description : mkOpt type (description + ''
Defaults to <literal>${defaultStr}</literal> in prometheus
when set to <literal>null</literal>.
'');
mkOpt = type : description : mkOption {
type = types.nullOr type;
default = null;
inherit description;
};
promTypes.globalConfig = types.submodule {
options = {
scrape_interval = mkDefOpt types.str "1m" ''
How frequently to scrape targets by default.
'';
scrape_timeout = mkDefOpt types.str "10s" ''
How long until a scrape request times out.
'';
evaluation_interval = mkDefOpt types.str "1m" ''
How frequently to evaluate rules by default.
'';
external_labels = mkOpt (types.attrsOf types.str) ''
The labels to add to any time series or alerts when
communicating with external systems (federation, remote
storage, Alertmanager).
'';
};
};
promTypes.scrape_config = types.submodule {
options = {
job_name = mkOption {
type = types.str;
description = ''
The job name assigned to scraped metrics by default.
'';
};
scrape_interval = mkOpt types.str ''
How frequently to scrape targets from this job. Defaults to the
globally configured default.
'';
scrape_timeout = mkOpt types.str ''
Per-target timeout when scraping this job. Defaults to the
globally configured default.
'';
metrics_path = mkDefOpt types.str "/metrics" ''
The HTTP resource path on which to fetch metrics from targets.
'';
honor_labels = mkDefOpt types.bool "false" ''
Controls how Prometheus handles conflicts between labels
that are already present in scraped data and labels that
Prometheus would attach server-side ("job" and "instance"
labels, manually configured target labels, and labels
generated by service discovery implementations).
If honor_labels is set to "true", label conflicts are
resolved by keeping label values from the scraped data and
ignoring the conflicting server-side labels.
If honor_labels is set to "false", label conflicts are
resolved by renaming conflicting labels in the scraped data
to "exported_&lt;original-label&gt;" (for example
"exported_instance", "exported_job") and then attaching
server-side labels. This is useful for use cases such as
federation, where all labels specified in the target should
be preserved.
'';
honor_timestamps = mkDefOpt types.bool "true" ''
honor_timestamps controls whether Prometheus respects the timestamps present
in scraped data.
If honor_timestamps is set to <literal>true</literal>, the timestamps of the metrics exposed
by the target will be used.
If honor_timestamps is set to <literal>false</literal>, the timestamps of the metrics exposed
by the target will be ignored.
'';
scheme = mkDefOpt (types.enum ["http" "https"]) "http" ''
The URL scheme with which to fetch metrics from targets.
'';
params = mkOpt (types.attrsOf (types.listOf types.str)) ''
Optional HTTP URL parameters.
'';
basic_auth = mkOpt (types.submodule {
options = {
username = mkOption {
type = types.str;
description = ''
HTTP username
'';
};
password = mkOption {
type = types.str;
description = ''
HTTP password
'';
};
};
}) ''
Optional http login credentials for metrics scraping.
'';
bearer_token = mkOpt types.str ''
Sets the `Authorization` header on every scrape request with
the configured bearer token. It is mutually exclusive with
<option>bearer_token_file</option>.
'';
bearer_token_file = mkOpt types.str ''
Sets the `Authorization` header on every scrape request with
the bearer token read from the configured file. It is mutually
exclusive with <option>bearer_token</option>.
'';
tls_config = mkOpt promTypes.tls_config ''
Configures the scrape request's TLS settings.
'';
proxy_url = mkOpt types.str ''
Optional proxy URL.
'';
ec2_sd_configs = mkOpt (types.listOf promTypes.ec2_sd_config) ''
List of EC2 service discovery configurations.
'';
dns_sd_configs = mkOpt (types.listOf promTypes.dns_sd_config) ''
List of DNS service discovery configurations.
'';
consul_sd_configs = mkOpt (types.listOf promTypes.consul_sd_config) ''
List of Consul service discovery configurations.
'';
file_sd_configs = mkOpt (types.listOf promTypes.file_sd_config) ''
List of file service discovery configurations.
'';
static_configs = mkOpt (types.listOf promTypes.static_config) ''
List of labeled target groups for this job.
'';
relabel_configs = mkOpt (types.listOf promTypes.relabel_config) ''
List of relabel configurations.
'';
sample_limit = mkDefOpt types.int "0" ''
Per-scrape limit on number of scraped samples that will be accepted.
If more than this number of samples are present after metric relabelling
the entire scrape will be treated as failed. 0 means no limit.
'';
};
};
promTypes.static_config = types.submodule {
options = {
targets = mkOption {
type = types.listOf types.str;
description = ''
The targets specified by the target group.
'';
};
labels = mkOption {
type = types.attrsOf types.str;
default = {};
description = ''
Labels assigned to all metrics scraped from the targets.
'';
};
};
};
promTypes.ec2_sd_config = types.submodule {
options = {
region = mkOption {
type = types.str;
description = ''
The AWS Region.
'';
};
endpoint = mkOpt types.str ''
Custom endpoint to be used.
'';
access_key = mkOpt types.str ''
The AWS API key id. If blank, the environment variable
<literal>AWS_ACCESS_KEY_ID</literal> is used.
'';
secret_key = mkOpt types.str ''
The AWS API key secret. If blank, the environment variable
<literal>AWS_SECRET_ACCESS_KEY</literal> is used.
'';
profile = mkOpt types.str ''
Named AWS profile used to connect to the API.
'';
role_arn = mkOpt types.str ''
AWS Role ARN, an alternative to using AWS API keys.
'';
refresh_interval = mkDefOpt types.str "60s" ''
Refresh interval to re-read the instance list.
'';
port = mkDefOpt types.int "80" ''
The port to scrape metrics from. If using the public IP
address, this must instead be specified in the relabeling
rule.
'';
filters = mkOpt (types.listOf promTypes.filter) ''
Filters can be used optionally to filter the instance list by other criteria.
'';
};
};
promTypes.filter = types.submodule {
options = {
name = mkOption {
type = types.str;
description = ''
See <link xlink:href="https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeInstances.html">this list</link>
for the available filters.
'';
};
value = mkOption {
type = types.listOf types.str;
default = [];
description = ''
Value of the filter.
'';
};
};
};
promTypes.dns_sd_config = types.submodule {
options = {
names = mkOption {
type = types.listOf types.str;
description = ''
A list of DNS SRV record names to be queried.
'';
};
refresh_interval = mkDefOpt types.str "30s" ''
The time after which the provided names are refreshed.
'';
};
};
promTypes.consul_sd_config = types.submodule {
options = {
server = mkDefOpt types.str "localhost:8500" ''
Consul server to query.
'';
token = mkOpt types.str "Consul token";
datacenter = mkOpt types.str "Consul datacenter";
scheme = mkDefOpt types.str "http" "Consul scheme";
username = mkOpt types.str "Consul username";
password = mkOpt types.str "Consul password";
tls_config = mkOpt promTypes.tls_config ''
Configures the Consul request's TLS settings.
'';
services = mkOpt (types.listOf types.str) ''
A list of services for which targets are retrieved.
'';
tags = mkOpt (types.listOf types.str) ''
An optional list of tags used to filter nodes for a given
service. Services must contain all tags in the list.
'';
node_meta = mkOpt (types.attrsOf types.str) ''
Node metadata used to filter nodes for a given service.
'';
tag_separator = mkDefOpt types.str "," ''
The string by which Consul tags are joined into the tag label.
'';
allow_stale = mkOpt types.bool ''
Allow stale Consul results
(see <link xlink:href="https://www.consul.io/api/index.html#consistency-modes"/>).
Will reduce load on Consul.
'';
refresh_interval = mkDefOpt types.str "30s" ''
The time after which the provided names are refreshed.
On large setup it might be a good idea to increase this value
because the catalog will change all the time.
'';
};
};
promTypes.file_sd_config = types.submodule {
options = {
files = mkOption {
type = types.listOf types.str;
description = ''
Patterns for files from which target groups are extracted. Refer
to the Prometheus documentation for permitted filename patterns
and formats.
'';
};
refresh_interval = mkDefOpt types.str "5m" ''
Refresh interval to re-read the files.
'';
};
};
promTypes.relabel_config = types.submodule {
options = {
source_labels = mkOpt (types.listOf types.str) ''
The source labels select values from existing labels. Their content
is concatenated using the configured separator and matched against
the configured regular expression.
'';
separator = mkDefOpt types.str ";" ''
Separator placed between concatenated source label values.
'';
target_label = mkOpt types.str ''
Label to which the resulting value is written in a replace action.
It is mandatory for replace actions.
'';
regex = mkDefOpt types.str "(.*)" ''
Regular expression against which the extracted value is matched.
'';
modulus = mkOpt types.int ''
Modulus to take of the hash of the source label values.
'';
replacement = mkDefOpt types.str "$1" ''
Replacement value against which a regex replace is performed if the
regular expression matches.
'';
action = mkDefOpt (types.enum ["replace" "keep" "drop"]) "replace" ''
Action to perform based on regex matching.
'';
};
};
promTypes.tls_config = types.submodule {
options = {
ca_file = mkOpt types.str ''
CA certificate to validate API server certificate with.
'';
cert_file = mkOpt types.str ''
Certificate file for client cert authentication to the server.
'';
key_file = mkOpt types.str ''
Key file for client cert authentication to the server.
'';
server_name = mkOpt types.str ''
ServerName extension to indicate the name of the server.
http://tools.ietf.org/html/rfc4366#section-3.1
'';
insecure_skip_verify = mkOpt types.bool ''
Disable validation of the server certificate.
'';
};
};
in {
imports = [
(mkRenamedOptionModule [ "services" "prometheus2" ] [ "services" "prometheus" ])
];
options.services.prometheus = {
enable = mkOption {
type = types.bool;
default = false;
description = ''
Enable the Prometheus monitoring daemon.
'';
};
package = mkOption {
type = types.package;
default = pkgs.prometheus;
defaultText = "pkgs.prometheus";
description = ''
The prometheus package that should be used.
'';
};
port = mkOption {
type = types.port;
default = 9090;
description = ''
Port to listen on.
'';
};
listenAddress = mkOption {
type = types.str;
default = "0.0.0.0";
description = ''
Address to listen on for the web interface, API, and telemetry.
'';
};
stateDir = mkOption {
type = types.str;
default = "prometheus2";
description = ''
Directory below <literal>/var/lib</literal> to store Prometheus metrics data.
This directory will be created automatically using systemd's StateDirectory mechanism.
'';
};
extraFlags = mkOption {
type = types.listOf types.str;
default = [];
description = ''
Extra commandline options when launching Prometheus.
'';
};
configText = mkOption {
type = types.nullOr types.lines;
default = null;
description = ''
If non-null, this option defines the text that is written to
prometheus.yml. If null, the contents of prometheus.yml is generated
from the structured config options.
'';
};
globalConfig = mkOption {
type = promTypes.globalConfig;
default = {};
description = ''
Parameters that are valid in all configuration contexts. They
also serve as defaults for other configuration sections
'';
};
rules = mkOption {
type = types.listOf types.str;
default = [];
description = ''
Alerting and/or Recording rules to evaluate at runtime.
'';
};
ruleFiles = mkOption {
type = types.listOf types.path;
default = [];
description = ''
Any additional rules files to include in this configuration.
'';
};
scrapeConfigs = mkOption {
type = types.listOf promTypes.scrape_config;
default = [];
description = ''
A list of scrape configurations.
'';
};
alertmanagers = mkOption {
type = types.listOf types.attrs;
example = literalExample ''
[ {
scheme = "https";
path_prefix = "/alertmanager";
static_configs = [ {
targets = [
"prometheus.domain.tld"
];
} ];
} ]
'';
default = [];
description = ''
A list of alertmanagers to send alerts to.
See <link xlink:href="https://prometheus.io/docs/prometheus/latest/configuration/configuration/#alertmanager_config">the official documentation</link> for more information.
'';
};
alertmanagerNotificationQueueCapacity = mkOption {
type = types.int;
default = 10000;
description = ''
The capacity of the queue for pending alert manager notifications.
'';
};
alertmanagerTimeout = mkOption {
type = types.int;
default = 10;
description = ''
Alert manager HTTP API timeout (in seconds).
'';
};
webExternalUrl = mkOption {
type = types.nullOr types.str;
default = null;
example = "https://example.com/";
description = ''
The URL under which Prometheus is externally reachable (for example,
if Prometheus is served via a reverse proxy).
'';
};
checkConfig = mkOption {
type = types.bool;
default = true;
description = ''
Check configuration with <literal>promtool
check</literal>. The call to <literal>promtool</literal> is
subject to sandboxing by Nix. When credentials are stored in
external files (<literal>password_file</literal>,
<literal>bearer_token_file</literal>, etc), they will not be
visible to <literal>promtool</literal> and it will report
errors, despite a correct configuration.
'';
};
retentionTime = mkOption {
type = types.nullOr types.str;
default = null;
example = "15d";
description = ''
How long to retain samples in storage.
'';
};
};
config = mkIf cfg.enable {
assertions = [
( let
# Match something with dots (an IPv4 address) or something ending in
# a square bracket (an IPv6 addresses) followed by a port number.
legacy = builtins.match "(.*\\..*|.*]):([[:digit:]]+)" cfg.listenAddress;
in {
assertion = legacy == null;
message = ''
Do not specify the port for Prometheus to listen on in the
listenAddress option; use the port option instead:
services.prometheus.listenAddress = ${builtins.elemAt legacy 0};
services.prometheus.port = ${builtins.elemAt legacy 1};
'';
}
)
];
users.groups.prometheus.gid = config.ids.gids.prometheus;
users.users.prometheus = {
description = "Prometheus daemon user";
uid = config.ids.uids.prometheus;
group = "prometheus";
};
systemd.services.prometheus = {
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
serviceConfig = {
ExecStart = "${cfg.package}/bin/prometheus" +
optionalString (length cmdlineArgs != 0) (" \\\n " +
concatStringsSep " \\\n " cmdlineArgs);
User = "prometheus";
Restart = "always";
WorkingDirectory = workingDir;
StateDirectory = cfg.stateDir;
};
};
};
}