{ cluster, ... }: let createNode = index: { pkgs, ... }: { networking.firewall.allowedTCPPorts = [ 5432 8008 5010 ]; environment.systemPackages = [ pkgs.jq ]; services.patroni = { enable = true; softwareWatchdog = true; settings = { bootstrap = { dcs = { ttl = 30; loop_wait = 10; retry_timeout = 10; maximum_lag_on_failover = 1048576; }; initdb = [ { encoding = "UTF8"; } "data-checksums" ]; }; postgresql = { use_pg_rewind = true; use_slots = true; authentication = { replication = { username = "replicator"; }; superuser = { username = "postgres"; }; rewind = { username = "rewind"; }; }; parameters = { wal_level = "replica"; hot_standby_feedback = "on"; unix_socket_directories = "/tmp"; }; pg_hba = [ "host replication replicator 192.168.1.0/24 md5" # Unsafe, do not use for anything other than tests "host all all 0.0.0.0/0 trust" ]; }; etcd3 = { host = "192.168.1.4:2379"; }; }; environmentFiles = { PATRONI_REPLICATION_PASSWORD = pkgs.writeText "replication-password" "postgres"; PATRONI_SUPERUSER_PASSWORD = pkgs.writeText "superuser-password" "postgres"; PATRONI_REWIND_PASSWORD = pkgs.writeText "rewind-password" "postgres"; }; }; # We always want to restart so the tests never hang systemd.services.patroni.serviceConfig.StartLimitIntervalSec = 0; }; clusterName = "poseidon"; link = cluster.config.links.patroni-pg-access; in { defaults = { depot, pkgs, ... }: { environment.systemPackages = [ pkgs.jq depot.packages.postgresql ]; services.patroni.settings.postgresql.pg_hba = [ "host all all 0.0.0.0/0 trust" ]; }; # taken from https://github.com/phfroidmont/nixpkgs/blob/patroni-module/nixos/tests/patroni.nix testScript = '' import json nodeNames = json.loads('${builtins.toJSON cluster.config.services.patroni.nodes.worker}') clientNames = json.loads('${builtins.toJSON cluster.config.services.patroni.nodes.haproxy}') nodes = [ n for n in machines if n.name in nodeNames ] clients = [ n for n in machines if n.name in clientNames ] def wait_for_all_nodes_ready(expected_replicas=2): booted_nodes = filter(lambda node: node.booted, nodes) for node in booted_nodes: print(node.succeed("patronictl list ${clusterName}")) node.wait_until_succeeds(f"[ $(patronictl list -f json ${clusterName} | jq 'length') == {expected_replicas + 1} ]") node.wait_until_succeeds("[ $(patronictl list -f json ${clusterName} | jq 'map(select(.Role | test(\"^Leader$\"))) | map(select(.State | test(\"^running$\"))) | length') == 1 ]") node.wait_until_succeeds(f"[ $(patronictl list -f json ${clusterName} | jq 'map(select(.Role | test(\"^Replica$\"))) | map(select(.State | test(\"^streaming$\"))) | length') == {expected_replicas} ]") print(node.succeed("patronictl list ${clusterName}")) for client in clients: client.wait_until_succeeds("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --command='select 1;'") def run_dummy_queries(): for client in clients: client.succeed("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='insert into dummy(val) values (101);'") client.succeed("test $(psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='select val from dummy where val = 101;') -eq 101") client.succeed("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='delete from dummy where val = 101;'") start_all() with subtest("should bootstrap a new patroni cluster"): wait_for_all_nodes_ready() with subtest("should be able to insert and select"): clients[0].succeed("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --command='create table dummy as select * from generate_series(1, 100) as val;'") for client in clients: client.succeed("test $(psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='select count(distinct val) from dummy;') -eq 100") with subtest("should restart after all nodes are crashed"): for node in nodes: node.crash() for node in nodes: node.start() wait_for_all_nodes_ready() with subtest("should be able to run queries while any one node is crashed"): masterNodeName = nodes[0].succeed("patronictl list -f json ${clusterName} | jq '.[] | select(.Role | test(\"^Leader$\")) | .Member' -r").strip() masterNodeIndex = next((i for i, v in enumerate(nodes) if v.name == masterNodeName), None) # Move master node at the end of the list to avoid multiple failovers (makes the test faster and more consistent) nodes.append(nodes.pop(masterNodeIndex)) for node in nodes: node.crash() wait_for_all_nodes_ready(1) # Execute some queries while a node is down. run_dummy_queries() # Restart crashed node. node.start() wait_for_all_nodes_ready() # Execute some queries with the node back up. run_dummy_queries() ''; }