cluster/services/patroni: test in simulacrum

This commit is contained in:
Max Headroom 2024-08-14 02:59:33 +02:00
parent c57976a299
commit 4de8a48fcd
3 changed files with 111 additions and 1 deletions

View file

@ -4,6 +4,7 @@
imports = [ imports = [
./options.nix ./options.nix
./incandescence.nix ./incandescence.nix
./simulacrum/test-data.nix
]; ];
links = { links = {
@ -36,6 +37,10 @@
PATRONI_REWIND_PASSWORD = default; PATRONI_REWIND_PASSWORD = default;
metricsCredentials.nodes = nodes.worker; metricsCredentials.nodes = nodes.worker;
}; };
simulacrum.deps = [ "consul" "incandescence" "locksmith" ]; simulacrum = {
enable = true;
deps = [ "consul" "incandescence" "locksmith" ];
settings = ./simulacrum/test.nix;
};
}; };
} }

View file

@ -0,0 +1,14 @@
{ config, lib, ... }:
{
patroni = lib.mkIf config.simulacrum {
databases = config.lib.forService "patroni" {
testdb.owner = "testuser";
};
users = config.lib.forService "patroni" {
testuser.locksmith = {
nodes = config.services.patroni.nodes.haproxy;
format = "pgpass";
};
};
};
}

View file

@ -0,0 +1,91 @@
{ cluster, ... }:
let
clusterName = "poseidon";
link = cluster.config.links.patroni-pg-access;
in
{
defaults = { depot, pkgs, ... }: {
environment.systemPackages = [
pkgs.jq
depot.packages.postgresql
];
services.patroni.settings.postgresql.pg_hba = [
"host postgres postgres 0.0.0.0/0 trust"
];
};
# taken from https://github.com/phfroidmont/nixpkgs/blob/patroni-module/nixos/tests/patroni.nix
testScript = ''
import json
nodeNames = json.loads('${builtins.toJSON cluster.config.services.patroni.nodes.worker}')
clientNames = json.loads('${builtins.toJSON cluster.config.services.patroni.nodes.haproxy}')
nodes = [ n for n in machines if n.name in nodeNames ]
clients = [ n for n in machines if n.name in clientNames ]
def booted(nodes):
return filter(lambda node: node.booted, nodes)
def wait_for_all_nodes_ready(expected_replicas=2):
booted_nodes = booted(nodes)
for node in booted_nodes:
node.wait_for_unit("patroni.service")
print(node.succeed("patronictl list ${clusterName}"))
node.wait_until_succeeds(f"[ $(patronictl list -f json ${clusterName} | jq 'length') == {expected_replicas + 1} ]")
node.wait_until_succeeds("[ $(patronictl list -f json ${clusterName} | jq 'map(select(.Role | test(\"^Leader$\"))) | map(select(.State | test(\"^running$\"))) | length') == 1 ]")
node.wait_until_succeeds(f"[ $(patronictl list -f json ${clusterName} | jq 'map(select(.Role | test(\"^Replica$\"))) | map(select(.State | test(\"^streaming$\"))) | length') == {expected_replicas} ]")
print(node.succeed("patronictl list ${clusterName}"))
for client in booted(clients):
client.wait_until_succeeds("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --command='select 1;'")
def run_dummy_queries():
for client in booted(clients):
client.succeed("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='insert into dummy(val) values (101);'")
client.succeed("test $(psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='select val from dummy where val = 101;') -eq 101")
client.succeed("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='delete from dummy where val = 101;'")
start_all()
with subtest("should bootstrap a new patroni cluster"):
wait_for_all_nodes_ready()
with subtest("should be able to insert and select"):
booted_clients = list(booted(clients))
booted_clients[0].succeed("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --command='create table dummy as select * from generate_series(1, 100) as val;'")
for client in booted_clients:
client.succeed("test $(psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='select count(distinct val) from dummy;') -eq 100")
with subtest("should restart after all nodes are crashed"):
for node in nodes:
node.crash()
for node in nodes:
node.start()
wait_for_all_nodes_ready()
with subtest("should be able to run queries while any one node is crashed"):
masterNodeName = nodes[0].succeed("patronictl list -f json ${clusterName} | jq '.[] | select(.Role | test(\"^Leader$\")) | .Member' -r").strip()
masterNodeIndex = next((i for i, v in enumerate(nodes) if v.name == masterNodeName))
# Move master node at the end of the list to avoid multiple failovers (makes the test faster and more consistent)
nodes.append(nodes.pop(masterNodeIndex))
for node in nodes:
node.crash()
wait_for_all_nodes_ready(1)
# Execute some queries while a node is down.
run_dummy_queries()
# Restart crashed node.
node.start()
wait_for_all_nodes_ready()
# Execute some queries with the node back up.
run_dummy_queries()
with subtest("should create databases and users via incandescence"):
for client in clients:
client.succeed(f"PGPASSFILE=/run/locksmith/patroni-testuser psql -h ${link.ipv4} -p ${link.portStr} -U testuser -d testdb --command='create table test_table_{client.name} as select * from generate_series(1, 10) as val;'")
client.fail(f"PGPASSFILE=/run/locksmith/patroni-testuser psql -h ${link.ipv4} -p ${link.portStr} -U testuser -d postgres --command='select 1;'")
'';
}