nixpkgs/nixos/tests/patroni.nix
Silvan Mosberger 4f0dadbf38 treewide: format all inactive Nix files
After final improvements to the official formatter implementation,
this commit now performs the first treewide reformat of Nix files using it.
This is part of the implementation of RFC 166.

Only "inactive" files are reformatted, meaning only files that
aren't being touched by any PR with activity in the past 2 months.
This is to avoid conflicts for PRs that might soon be merged.
Later we can do a full treewide reformat to get the rest,
which should not cause as many conflicts.

A CI check has already been running for some time to ensure that new and
already-formatted files are formatted, so the files being reformatted here
should also stay formatted.

This commit was automatically created and can be verified using

    nix-build a08b3a4d19.tar.gz \
      --argstr baseRev b32a094368
    result/bin/apply-formatting $NIXPKGS_PATH
2024-12-10 20:26:33 +01:00

228 lines
7.4 KiB
Nix

import ./make-test-python.nix (
{ pkgs, lib, ... }:
let
nodesIps = [
"192.168.1.1"
"192.168.1.2"
"192.168.1.3"
];
createNode =
index:
{ pkgs, ... }:
let
ip = builtins.elemAt nodesIps index; # since we already use IPs to identify servers
in
{
networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkOverride 0 [
{
address = ip;
prefixLength = 16;
}
];
networking.firewall.allowedTCPPorts = [
5432
8008
5010
];
environment.systemPackages = [ pkgs.jq ];
services.patroni = {
enable = true;
postgresqlPackage = pkgs.postgresql_14.withPackages (p: [ p.pg_safeupdate ]);
scope = "cluster1";
name = "node${toString (index + 1)}";
nodeIp = ip;
otherNodesIps = builtins.filter (h: h != ip) nodesIps;
softwareWatchdog = true;
settings = {
bootstrap = {
dcs = {
ttl = 30;
loop_wait = 10;
retry_timeout = 10;
maximum_lag_on_failover = 1048576;
};
initdb = [
{ encoding = "UTF8"; }
"data-checksums"
];
};
postgresql = {
use_pg_rewind = true;
use_slots = true;
authentication = {
replication = {
username = "replicator";
};
superuser = {
username = "postgres";
};
rewind = {
username = "rewind";
};
};
parameters = {
listen_addresses = "${ip}";
wal_level = "replica";
hot_standby_feedback = "on";
unix_socket_directories = "/tmp";
};
pg_hba = [
"host replication replicator 192.168.1.0/24 md5"
# Unsafe, do not use for anything other than tests
"host all all 0.0.0.0/0 trust"
];
};
etcd3 = {
host = "192.168.1.4:2379";
};
};
environmentFiles = {
PATRONI_REPLICATION_PASSWORD = pkgs.writeText "replication-password" "postgres";
PATRONI_SUPERUSER_PASSWORD = pkgs.writeText "superuser-password" "postgres";
PATRONI_REWIND_PASSWORD = pkgs.writeText "rewind-password" "postgres";
};
};
# We always want to restart so the tests never hang
systemd.services.patroni.serviceConfig.StartLimitIntervalSec = 0;
};
in
{
name = "patroni";
nodes = {
node1 = createNode 0;
node2 = createNode 1;
node3 = createNode 2;
etcd =
{ pkgs, ... }:
{
networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkOverride 0 [
{
address = "192.168.1.4";
prefixLength = 16;
}
];
services.etcd = {
enable = true;
listenClientUrls = [ "http://192.168.1.4:2379" ];
};
networking.firewall.allowedTCPPorts = [ 2379 ];
};
client =
{ pkgs, ... }:
{
environment.systemPackages = [ pkgs.postgresql_14 ];
networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkOverride 0 [
{
address = "192.168.2.1";
prefixLength = 16;
}
];
services.haproxy = {
enable = true;
config = ''
global
maxconn 100
defaults
log global
mode tcp
retries 2
timeout client 30m
timeout connect 4s
timeout server 30m
timeout check 5s
listen cluster1
bind 127.0.0.1:5432
option httpchk
http-check expect status 200
default-server inter 3s fall 3 rise 2 on-marked-down shutdown-sessions
${builtins.concatStringsSep "\n" (
map (ip: "server postgresql_${ip}_5432 ${ip}:5432 maxconn 100 check port 8008") nodesIps
)}
'';
};
};
};
testScript = ''
nodes = [node1, node2, node3]
def wait_for_all_nodes_ready(expected_replicas=2):
booted_nodes = filter(lambda node: node.booted, nodes)
for node in booted_nodes:
print(node.succeed("patronictl list cluster1"))
node.wait_until_succeeds(f"[ $(patronictl list -f json cluster1 | jq 'length') == {expected_replicas + 1} ]")
node.wait_until_succeeds("[ $(patronictl list -f json cluster1 | jq 'map(select(.Role | test(\"^Leader$\"))) | map(select(.State | test(\"^running$\"))) | length') == 1 ]")
node.wait_until_succeeds(f"[ $(patronictl list -f json cluster1 | jq 'map(select(.Role | test(\"^Replica$\"))) | map(select(.State | test(\"^streaming$\"))) | length') == {expected_replicas} ]")
print(node.succeed("patronictl list cluster1"))
client.wait_until_succeeds("psql -h 127.0.0.1 -U postgres --command='select 1;'")
def run_dummy_queries():
client.succeed("psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='insert into dummy(val) values (101);'")
client.succeed("test $(psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='select val from dummy where val = 101;') -eq 101")
client.succeed("psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='delete from dummy where val = 101;'")
start_all()
etcd.wait_for_unit("etcd.service")
with subtest("should bootstrap a new patroni cluster"):
wait_for_all_nodes_ready()
with subtest("should be able to insert and select"):
client.succeed("psql -h 127.0.0.1 -U postgres --command='create table dummy as select * from generate_series(1, 100) as val;'")
client.succeed("test $(psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='select count(distinct val) from dummy;') -eq 100")
with subtest("should restart after all nodes are crashed"):
for node in nodes:
node.crash()
for node in nodes:
node.start()
wait_for_all_nodes_ready()
with subtest("should be able to run queries while any one node is crashed"):
masterNodeName = node1.succeed("patronictl list -f json cluster1 | jq '.[] | select(.Role | test(\"^Leader$\")) | .Member' -r").strip()
masterNodeIndex = int(masterNodeName[len(masterNodeName)-1]) - 1
# Move master node at the end of the list to avoid multiple failovers (makes the test faster and more consistent)
nodes.append(nodes.pop(masterNodeIndex))
for node in nodes:
node.crash()
wait_for_all_nodes_ready(1)
# Execute some queries while a node is down.
run_dummy_queries()
# Restart crashed node.
node.start()
wait_for_all_nodes_ready()
# Execute some queries with the node back up.
run_dummy_queries()
'';
}
)