cluster/services/patroni: test in simulacrum
This commit is contained in:
parent
6ab5ca5abf
commit
cc080c55e4
6 changed files with 171 additions and 3 deletions
|
@ -39,6 +39,9 @@ in
|
|||
fi
|
||||
''))
|
||||
(concatStringsSep "\n")
|
||||
(script: if script == "" then ''
|
||||
echo "Nothing to create"
|
||||
'' else script)
|
||||
];
|
||||
};
|
||||
"ignite-${provider}-${formula}-change" = mkIf (formulaConfig.change != null) {
|
||||
|
@ -58,6 +61,9 @@ in
|
|||
) || echo "Change failed: ${object}"
|
||||
''))
|
||||
(concatStringsSep "\n")
|
||||
(script: if script == "" then ''
|
||||
echo "Nothing to change"
|
||||
'' else script)
|
||||
];
|
||||
};
|
||||
"ignite-${provider}-${formula}-destroy" = {
|
||||
|
|
|
@ -14,6 +14,10 @@
|
|||
./provider.nix
|
||||
];
|
||||
};
|
||||
simulacrum.deps = [ "chant" "consul" ];
|
||||
simulacrum = {
|
||||
enable = true;
|
||||
deps = [ "chant" "consul" ];
|
||||
settings = ./test.nix;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
|
3
cluster/services/locksmith/test.nix
Normal file
3
cluster/services/locksmith/test.nix
Normal file
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
testScript = '''';
|
||||
}
|
|
@ -36,6 +36,10 @@
|
|||
PATRONI_REWIND_PASSWORD = default;
|
||||
metricsCredentials.nodes = nodes.worker;
|
||||
};
|
||||
simulacrum.deps = [ "consul" "incandescence" "locksmith" ];
|
||||
simulacrum = {
|
||||
enable = true;
|
||||
deps = [ "consul" "incandescence" "locksmith" ];
|
||||
settings = ./test.nix;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
|
150
cluster/services/patroni/test.nix
Normal file
150
cluster/services/patroni/test.nix
Normal file
|
@ -0,0 +1,150 @@
|
|||
{ cluster, ... }:
|
||||
|
||||
let
|
||||
createNode = index: { pkgs, ... }:
|
||||
{
|
||||
|
||||
networking.firewall.allowedTCPPorts = [ 5432 8008 5010 ];
|
||||
|
||||
environment.systemPackages = [ pkgs.jq ];
|
||||
|
||||
services.patroni = {
|
||||
|
||||
enable = true;
|
||||
|
||||
softwareWatchdog = true;
|
||||
|
||||
settings = {
|
||||
bootstrap = {
|
||||
dcs = {
|
||||
ttl = 30;
|
||||
loop_wait = 10;
|
||||
retry_timeout = 10;
|
||||
maximum_lag_on_failover = 1048576;
|
||||
};
|
||||
initdb = [
|
||||
{ encoding = "UTF8"; }
|
||||
"data-checksums"
|
||||
];
|
||||
};
|
||||
|
||||
postgresql = {
|
||||
use_pg_rewind = true;
|
||||
use_slots = true;
|
||||
authentication = {
|
||||
replication = {
|
||||
username = "replicator";
|
||||
};
|
||||
superuser = {
|
||||
username = "postgres";
|
||||
};
|
||||
rewind = {
|
||||
username = "rewind";
|
||||
};
|
||||
};
|
||||
parameters = {
|
||||
wal_level = "replica";
|
||||
hot_standby_feedback = "on";
|
||||
unix_socket_directories = "/tmp";
|
||||
};
|
||||
pg_hba = [
|
||||
"host replication replicator 192.168.1.0/24 md5"
|
||||
# Unsafe, do not use for anything other than tests
|
||||
"host all all 0.0.0.0/0 trust"
|
||||
];
|
||||
};
|
||||
|
||||
etcd3 = {
|
||||
host = "192.168.1.4:2379";
|
||||
};
|
||||
};
|
||||
|
||||
environmentFiles = {
|
||||
PATRONI_REPLICATION_PASSWORD = pkgs.writeText "replication-password" "postgres";
|
||||
PATRONI_SUPERUSER_PASSWORD = pkgs.writeText "superuser-password" "postgres";
|
||||
PATRONI_REWIND_PASSWORD = pkgs.writeText "rewind-password" "postgres";
|
||||
};
|
||||
};
|
||||
|
||||
# We always want to restart so the tests never hang
|
||||
systemd.services.patroni.serviceConfig.StartLimitIntervalSec = 0;
|
||||
};
|
||||
|
||||
clusterName = "poseidon";
|
||||
link = cluster.config.links.patroni-pg-access;
|
||||
in
|
||||
{
|
||||
defaults = { depot, pkgs, ... }: {
|
||||
environment.systemPackages = [
|
||||
pkgs.jq
|
||||
depot.packages.postgresql
|
||||
];
|
||||
services.patroni.settings.postgresql.pg_hba = [
|
||||
"host all all 0.0.0.0/0 trust"
|
||||
];
|
||||
};
|
||||
|
||||
# taken from https://github.com/phfroidmont/nixpkgs/blob/patroni-module/nixos/tests/patroni.nix
|
||||
testScript = ''
|
||||
import json
|
||||
nodeNames = json.loads('${builtins.toJSON cluster.config.services.patroni.nodes.worker}')
|
||||
clientNames = json.loads('${builtins.toJSON cluster.config.services.patroni.nodes.haproxy}')
|
||||
nodes = [ n for n in machines if n.name in nodeNames ]
|
||||
clients = [ n for n in machines if n.name in clientNames ]
|
||||
|
||||
def wait_for_all_nodes_ready(expected_replicas=2):
|
||||
booted_nodes = filter(lambda node: node.booted, nodes)
|
||||
for node in booted_nodes:
|
||||
print(node.succeed("patronictl list ${clusterName}"))
|
||||
node.wait_until_succeeds(f"[ $(patronictl list -f json ${clusterName} | jq 'length') == {expected_replicas + 1} ]")
|
||||
node.wait_until_succeeds("[ $(patronictl list -f json ${clusterName} | jq 'map(select(.Role | test(\"^Leader$\"))) | map(select(.State | test(\"^running$\"))) | length') == 1 ]")
|
||||
node.wait_until_succeeds(f"[ $(patronictl list -f json ${clusterName} | jq 'map(select(.Role | test(\"^Replica$\"))) | map(select(.State | test(\"^streaming$\"))) | length') == {expected_replicas} ]")
|
||||
print(node.succeed("patronictl list ${clusterName}"))
|
||||
for client in clients:
|
||||
client.wait_until_succeeds("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --command='select 1;'")
|
||||
|
||||
def run_dummy_queries():
|
||||
for client in clients:
|
||||
client.succeed("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='insert into dummy(val) values (101);'")
|
||||
client.succeed("test $(psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='select val from dummy where val = 101;') -eq 101")
|
||||
client.succeed("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='delete from dummy where val = 101;'")
|
||||
|
||||
start_all()
|
||||
|
||||
with subtest("should bootstrap a new patroni cluster"):
|
||||
wait_for_all_nodes_ready()
|
||||
|
||||
with subtest("should be able to insert and select"):
|
||||
clients[0].succeed("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --command='create table dummy as select * from generate_series(1, 100) as val;'")
|
||||
for client in clients:
|
||||
client.succeed("test $(psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='select count(distinct val) from dummy;') -eq 100")
|
||||
|
||||
with subtest("should restart after all nodes are crashed"):
|
||||
for node in nodes:
|
||||
node.crash()
|
||||
for node in nodes:
|
||||
node.start()
|
||||
wait_for_all_nodes_ready()
|
||||
|
||||
with subtest("should be able to run queries while any one node is crashed"):
|
||||
masterNodeName = nodes[0].succeed("patronictl list -f json ${clusterName} | jq '.[] | select(.Role | test(\"^Leader$\")) | .Member' -r").strip()
|
||||
masterNodeIndex = next((i for i, v in enumerate(nodes) if v.name == masterNodeName), None)
|
||||
|
||||
# Move master node at the end of the list to avoid multiple failovers (makes the test faster and more consistent)
|
||||
nodes.append(nodes.pop(masterNodeIndex))
|
||||
|
||||
for node in nodes:
|
||||
node.crash()
|
||||
wait_for_all_nodes_ready(1)
|
||||
|
||||
# Execute some queries while a node is down.
|
||||
run_dummy_queries()
|
||||
|
||||
# Restart crashed node.
|
||||
node.start()
|
||||
wait_for_all_nodes_ready()
|
||||
|
||||
# Execute some queries with the node back up.
|
||||
run_dummy_queries()
|
||||
'';
|
||||
}
|
|
@ -22,7 +22,8 @@ in
|
|||
config.systemd.packages = pipe config.systemd.services [
|
||||
(filterAttrs (_: v: v.distributed.enable))
|
||||
(mapAttrsToList (n: v: let
|
||||
inherit (v.serviceConfig) ExecStart;
|
||||
# inherit (v.serviceConfig) ExecStart;
|
||||
ExecStart = builtins.trace "for service ${n}" v.serviceConfig.ExecStart;
|
||||
|
||||
cfg = v.distributed;
|
||||
|
||||
|
|
Loading…
Reference in a new issue