From 4de8a48fcdcd22ce4879f1fa6e9863edb1ee655e Mon Sep 17 00:00:00 2001 From: Max Date: Wed, 14 Aug 2024 02:59:33 +0200 Subject: [PATCH] cluster/services/patroni: test in simulacrum --- cluster/services/patroni/default.nix | 7 +- .../services/patroni/simulacrum/test-data.nix | 14 +++ cluster/services/patroni/simulacrum/test.nix | 91 +++++++++++++++++++ 3 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 cluster/services/patroni/simulacrum/test-data.nix create mode 100644 cluster/services/patroni/simulacrum/test.nix diff --git a/cluster/services/patroni/default.nix b/cluster/services/patroni/default.nix index b7ebe6f..0d9c977 100644 --- a/cluster/services/patroni/default.nix +++ b/cluster/services/patroni/default.nix @@ -4,6 +4,7 @@ imports = [ ./options.nix ./incandescence.nix + ./simulacrum/test-data.nix ]; links = { @@ -36,6 +37,10 @@ PATRONI_REWIND_PASSWORD = default; metricsCredentials.nodes = nodes.worker; }; - simulacrum.deps = [ "consul" "incandescence" "locksmith" ]; + simulacrum = { + enable = true; + deps = [ "consul" "incandescence" "locksmith" ]; + settings = ./simulacrum/test.nix; + }; }; } diff --git a/cluster/services/patroni/simulacrum/test-data.nix b/cluster/services/patroni/simulacrum/test-data.nix new file mode 100644 index 0000000..e56e862 --- /dev/null +++ b/cluster/services/patroni/simulacrum/test-data.nix @@ -0,0 +1,14 @@ +{ config, lib, ... }: +{ + patroni = lib.mkIf config.simulacrum { + databases = config.lib.forService "patroni" { + testdb.owner = "testuser"; + }; + users = config.lib.forService "patroni" { + testuser.locksmith = { + nodes = config.services.patroni.nodes.haproxy; + format = "pgpass"; + }; + }; + }; +} diff --git a/cluster/services/patroni/simulacrum/test.nix b/cluster/services/patroni/simulacrum/test.nix new file mode 100644 index 0000000..6b6b3f8 --- /dev/null +++ b/cluster/services/patroni/simulacrum/test.nix @@ -0,0 +1,91 @@ +{ cluster, ... }: + +let + clusterName = "poseidon"; + link = cluster.config.links.patroni-pg-access; +in +{ + defaults = { depot, pkgs, ... }: { + environment.systemPackages = [ + pkgs.jq + depot.packages.postgresql + ]; + services.patroni.settings.postgresql.pg_hba = [ + "host postgres postgres 0.0.0.0/0 trust" + ]; + }; + + # taken from https://github.com/phfroidmont/nixpkgs/blob/patroni-module/nixos/tests/patroni.nix + testScript = '' + import json + nodeNames = json.loads('${builtins.toJSON cluster.config.services.patroni.nodes.worker}') + clientNames = json.loads('${builtins.toJSON cluster.config.services.patroni.nodes.haproxy}') + nodes = [ n for n in machines if n.name in nodeNames ] + clients = [ n for n in machines if n.name in clientNames ] + + def booted(nodes): + return filter(lambda node: node.booted, nodes) + + def wait_for_all_nodes_ready(expected_replicas=2): + booted_nodes = booted(nodes) + for node in booted_nodes: + node.wait_for_unit("patroni.service") + print(node.succeed("patronictl list ${clusterName}")) + node.wait_until_succeeds(f"[ $(patronictl list -f json ${clusterName} | jq 'length') == {expected_replicas + 1} ]") + node.wait_until_succeeds("[ $(patronictl list -f json ${clusterName} | jq 'map(select(.Role | test(\"^Leader$\"))) | map(select(.State | test(\"^running$\"))) | length') == 1 ]") + node.wait_until_succeeds(f"[ $(patronictl list -f json ${clusterName} | jq 'map(select(.Role | test(\"^Replica$\"))) | map(select(.State | test(\"^streaming$\"))) | length') == {expected_replicas} ]") + print(node.succeed("patronictl list ${clusterName}")) + for client in booted(clients): + client.wait_until_succeeds("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --command='select 1;'") + + def run_dummy_queries(): + for client in booted(clients): + client.succeed("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='insert into dummy(val) values (101);'") + client.succeed("test $(psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='select val from dummy where val = 101;') -eq 101") + client.succeed("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='delete from dummy where val = 101;'") + + start_all() + + with subtest("should bootstrap a new patroni cluster"): + wait_for_all_nodes_ready() + + with subtest("should be able to insert and select"): + booted_clients = list(booted(clients)) + booted_clients[0].succeed("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --command='create table dummy as select * from generate_series(1, 100) as val;'") + for client in booted_clients: + client.succeed("test $(psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='select count(distinct val) from dummy;') -eq 100") + + with subtest("should restart after all nodes are crashed"): + for node in nodes: + node.crash() + for node in nodes: + node.start() + wait_for_all_nodes_ready() + + with subtest("should be able to run queries while any one node is crashed"): + masterNodeName = nodes[0].succeed("patronictl list -f json ${clusterName} | jq '.[] | select(.Role | test(\"^Leader$\")) | .Member' -r").strip() + masterNodeIndex = next((i for i, v in enumerate(nodes) if v.name == masterNodeName)) + + # Move master node at the end of the list to avoid multiple failovers (makes the test faster and more consistent) + nodes.append(nodes.pop(masterNodeIndex)) + + for node in nodes: + node.crash() + wait_for_all_nodes_ready(1) + + # Execute some queries while a node is down. + run_dummy_queries() + + # Restart crashed node. + node.start() + wait_for_all_nodes_ready() + + # Execute some queries with the node back up. + run_dummy_queries() + + with subtest("should create databases and users via incandescence"): + for client in clients: + client.succeed(f"PGPASSFILE=/run/locksmith/patroni-testuser psql -h ${link.ipv4} -p ${link.portStr} -U testuser -d testdb --command='create table test_table_{client.name} as select * from generate_series(1, 10) as val;'") + client.fail(f"PGPASSFILE=/run/locksmith/patroni-testuser psql -h ${link.ipv4} -p ${link.portStr} -U testuser -d postgres --command='select 1;'") + ''; +}