Compare commits
No commits in common. "8d7d178d9d0ae24fc880a8e227f24b8274eba718" and "54ba01d8cdc5b1bd731e586a80af7881af7854c0" have entirely different histories.
8d7d178d9d
...
54ba01d8cd
12 changed files with 225 additions and 285 deletions
|
@ -6,6 +6,5 @@
|
||||||
nixos.listener = [
|
nixos.listener = [
|
||||||
./listener.nix
|
./listener.nix
|
||||||
];
|
];
|
||||||
simulacrum.deps = [ "consul" ];
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,5 @@
|
||||||
./provider.nix
|
./provider.nix
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
simulacrum.deps = [ "chant" "consul" ];
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,10 +28,6 @@ in
|
||||||
command = mkOption {
|
command = mkOption {
|
||||||
type = types.coercedTo types.package (package: "${package}") types.str;
|
type = types.coercedTo types.package (package: "${package}") types.str;
|
||||||
};
|
};
|
||||||
checkUpdate = mkOption {
|
|
||||||
type = types.coercedTo types.package (package: "${package}") types.str;
|
|
||||||
default = "true";
|
|
||||||
};
|
|
||||||
owner = mkOption {
|
owner = mkOption {
|
||||||
type = types.str;
|
type = types.str;
|
||||||
default = "root";
|
default = "root";
|
||||||
|
@ -76,27 +72,20 @@ in
|
||||||
activeNodes = lib.unique (lib.flatten (lib.mapAttrsToList (_: secret: secret.nodes) activeSecrets));
|
activeNodes = lib.unique (lib.flatten (lib.mapAttrsToList (_: secret: secret.nodes) activeSecrets));
|
||||||
secretNames = map (name: "${providerRoot}-${name}/") (lib.attrNames activeSecrets);
|
secretNames = map (name: "${providerRoot}-${name}/") (lib.attrNames activeSecrets);
|
||||||
|
|
||||||
createSecret = { path, nodes, owner, mode, group, command, checkUpdate }: ''
|
createSecret = { path, nodes, owner, mode, group, command }: ''
|
||||||
if (${checkUpdate}); then
|
|
||||||
consul kv put ${lib.escapeShellArg path}/mode ${lib.escapeShellArg mode}
|
consul kv put ${lib.escapeShellArg path}/mode ${lib.escapeShellArg mode}
|
||||||
consul kv put ${lib.escapeShellArg path}/owner ${lib.escapeShellArg owner}
|
consul kv put ${lib.escapeShellArg path}/owner ${lib.escapeShellArg owner}
|
||||||
consul kv put ${lib.escapeShellArg path}/group ${lib.escapeShellArg group}
|
consul kv put ${lib.escapeShellArg path}/group ${lib.escapeShellArg group}
|
||||||
secret="$(mktemp -ut)"
|
|
||||||
(${command}) > "$secret"
|
|
||||||
${lib.concatStringsSep "\n" (map (node: ''
|
${lib.concatStringsSep "\n" (map (node: ''
|
||||||
consul kv put ${lib.escapeShellArg path}/recipient/${node} "$(age < "$secret" --encrypt --armor -r ${lib.escapeShellArg depot.hours.${node}.ssh.id.publicKey})"
|
consul kv put ${lib.escapeShellArg path}/recipient/${node} "$( (${command}) | age --encrypt --armor -r ${lib.escapeShellArg depot.hours.${node}.ssh.id.publicKey})"
|
||||||
'') nodes)}
|
'') nodes)}
|
||||||
else
|
|
||||||
echo Skipping update for ${lib.escapeShellArg path}
|
|
||||||
fi
|
|
||||||
'';
|
'';
|
||||||
in ''
|
in ''
|
||||||
# create/update secrets
|
# create/update secrets
|
||||||
umask 77
|
|
||||||
${lib.pipe activeSecrets [
|
${lib.pipe activeSecrets [
|
||||||
(lib.mapAttrsToList (secretName: secretConfig: createSecret {
|
(lib.mapAttrsToList (secretName: secretConfig: createSecret {
|
||||||
path = "${providerRoot}-${secretName}";
|
path = "${providerRoot}-${secretName}";
|
||||||
inherit (secretConfig) nodes mode owner group command checkUpdate;
|
inherit (secretConfig) nodes mode owner group command;
|
||||||
}))
|
}))
|
||||||
(lib.concatStringsSep "\n")
|
(lib.concatStringsSep "\n")
|
||||||
]}
|
]}
|
||||||
|
|
|
@ -1,91 +0,0 @@
|
||||||
{ cluster, config, lib, pkgs, ... }:
|
|
||||||
|
|
||||||
let
|
|
||||||
inherit (cluster.config.services.patroni) secrets;
|
|
||||||
|
|
||||||
patroni = cluster.config.links.patroni-pg-access;
|
|
||||||
|
|
||||||
cfg = cluster.config.patroni;
|
|
||||||
|
|
||||||
writeQueryFile = pkgs.writeText "patroni-query.sql";
|
|
||||||
|
|
||||||
psqlRunFile = file: ''
|
|
||||||
export PGPASSWORD="$(< ${secrets.PATRONI_SUPERUSER_PASSWORD.path})"
|
|
||||||
while ! ${config.services.patroni.postgresqlPackage}/bin/psql 'host=${patroni.ipv4} port=${patroni.portStr} dbname=postgres user=postgres' --tuples-only --csv --file="${file}"; do
|
|
||||||
sleep 3
|
|
||||||
done
|
|
||||||
'';
|
|
||||||
|
|
||||||
psql = query: psqlRunFile (writeQueryFile query);
|
|
||||||
|
|
||||||
psqlSecret = getSecret: queryTemplate: let
|
|
||||||
queryTemplateFile = writeQueryFile queryTemplate;
|
|
||||||
in ''
|
|
||||||
umask 77
|
|
||||||
secretFile="$(mktemp -ut patroniSecret.XXXXXXXXXXXXXXXX)"
|
|
||||||
queryFile="$(mktemp -ut patroniQuery.XXXXXXXXXXXXXXXX)"
|
|
||||||
trap "rm -f $secretFile $queryFile" EXIT
|
|
||||||
${getSecret} > "$secretFile"
|
|
||||||
cp --no-preserve=mode ${queryTemplateFile} "$queryFile"
|
|
||||||
${pkgs.replace-secret}/bin/replace-secret '@SECRET@' "$secretFile" "$queryFile"
|
|
||||||
${psqlRunFile "$queryFile"}
|
|
||||||
'';
|
|
||||||
|
|
||||||
genPassword = pkgs.writeShellScript "patroni-generate-user-password" ''
|
|
||||||
umask 77
|
|
||||||
base64 -w0 /dev/urandom | tr -d /+ | head -c256 | tee "/run/keys/locksmith-provider-patroni-$1"
|
|
||||||
'';
|
|
||||||
in
|
|
||||||
|
|
||||||
{
|
|
||||||
services.incandescence.providers.patroni = lib.mkIf config.services.haproxy.enable {
|
|
||||||
locksmith = true;
|
|
||||||
wantedBy = [ "patroni.service" "multi-user.target" ];
|
|
||||||
partOf = [ "patroni.service" ];
|
|
||||||
wants = [ "postgresql.service" ];
|
|
||||||
after = [ "postgresql.service" ];
|
|
||||||
|
|
||||||
formulae = {
|
|
||||||
user = {
|
|
||||||
destroyAfterDays = 0;
|
|
||||||
create = user: psqlSecret "${genPassword} ${user}" ''
|
|
||||||
CREATE USER ${user} PASSWORD '@SECRET@';
|
|
||||||
'';
|
|
||||||
destroy = psqlSecret "printenv OBJECT" ''
|
|
||||||
DROP USER @SECRET@;
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
database = {
|
|
||||||
destroyAfterDays = 30;
|
|
||||||
deps = [ "user" ];
|
|
||||||
create = db: psql ''
|
|
||||||
CREATE DATABASE ${db} OWNER ${cfg.databases.${db}.owner};
|
|
||||||
'';
|
|
||||||
destroy = psqlSecret "printenv OBJECT" ''
|
|
||||||
DROP DATABASE @SECRET@;
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
services.locksmith.providers.patroni = lib.mkIf config.services.haproxy.enable {
|
|
||||||
secrets = lib.mapAttrs (user: userConfig: {
|
|
||||||
command = {
|
|
||||||
envFile = ''
|
|
||||||
echo "PGPASSWORD=$(cat /run/keys/locksmith-provider-patroni-${user})"
|
|
||||||
rm -f /run/keys/locksmith-provider-patroni-${user}
|
|
||||||
'';
|
|
||||||
pgpass = ''
|
|
||||||
echo "*:*:*:${user}:$(cat /run/keys/locksmith-provider-patroni-${user})"
|
|
||||||
rm -f /run/keys/locksmith-provider-patroni-${user}
|
|
||||||
'';
|
|
||||||
raw = ''
|
|
||||||
cat /run/keys/locksmith-provider-patroni-${user}
|
|
||||||
rm -f /run/keys/locksmith-provider-patroni-${user}
|
|
||||||
'';
|
|
||||||
}.${userConfig.locksmith.format};
|
|
||||||
checkUpdate = "test -e /run/keys/locksmith-provider-patroni-${user}";
|
|
||||||
inherit (userConfig.locksmith) nodes;
|
|
||||||
}) cfg.users;
|
|
||||||
};
|
|
||||||
}
|
|
|
@ -1,12 +1,6 @@
|
||||||
{ config, ... }:
|
{ config, lib, ... }:
|
||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
|
||||||
./options.nix
|
|
||||||
./incandescence.nix
|
|
||||||
./simulacrum/test-data.nix
|
|
||||||
];
|
|
||||||
|
|
||||||
links = {
|
links = {
|
||||||
patroni-pg-internal.ipv4 = "0.0.0.0";
|
patroni-pg-internal.ipv4 = "0.0.0.0";
|
||||||
patroni-api.ipv4 = "0.0.0.0";
|
patroni-api.ipv4 = "0.0.0.0";
|
||||||
|
@ -21,7 +15,6 @@
|
||||||
worker = [
|
worker = [
|
||||||
./worker.nix
|
./worker.nix
|
||||||
./metrics.nix
|
./metrics.nix
|
||||||
./create-databases.nix
|
|
||||||
];
|
];
|
||||||
haproxy = ./haproxy.nix;
|
haproxy = ./haproxy.nix;
|
||||||
};
|
};
|
||||||
|
@ -37,10 +30,5 @@
|
||||||
PATRONI_REWIND_PASSWORD = default;
|
PATRONI_REWIND_PASSWORD = default;
|
||||||
metricsCredentials.nodes = nodes.worker;
|
metricsCredentials.nodes = nodes.worker;
|
||||||
};
|
};
|
||||||
simulacrum = {
|
|
||||||
enable = true;
|
|
||||||
deps = [ "consul" "incandescence" "locksmith" ];
|
|
||||||
settings = ./simulacrum/test.nix;
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +0,0 @@
|
||||||
{ config, lib, ... }:
|
|
||||||
|
|
||||||
{
|
|
||||||
incandescence.providers.patroni = {
|
|
||||||
objects = {
|
|
||||||
user = lib.attrNames config.patroni.users;
|
|
||||||
database = lib.attrNames config.patroni.databases;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
|
@ -1,39 +0,0 @@
|
||||||
{ lib, ... }:
|
|
||||||
|
|
||||||
let
|
|
||||||
inherit (lib) mkOption;
|
|
||||||
inherit (lib.types) attrsOf enum listOf submodule str;
|
|
||||||
in
|
|
||||||
|
|
||||||
{
|
|
||||||
options.patroni = {
|
|
||||||
databases = mkOption {
|
|
||||||
type = attrsOf (submodule ({ name, ... }: {
|
|
||||||
options = {
|
|
||||||
owner = mkOption {
|
|
||||||
type = str;
|
|
||||||
default = name;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}));
|
|
||||||
default = {};
|
|
||||||
};
|
|
||||||
users = mkOption {
|
|
||||||
type = attrsOf (submodule ({ ... }: {
|
|
||||||
options = {
|
|
||||||
locksmith = {
|
|
||||||
nodes = mkOption {
|
|
||||||
type = listOf str;
|
|
||||||
default = [];
|
|
||||||
};
|
|
||||||
format = mkOption {
|
|
||||||
type = enum [ "pgpass" "envFile" "raw" ];
|
|
||||||
default = "pgpass";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}));
|
|
||||||
default = {};
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
|
@ -1,14 +0,0 @@
|
||||||
{ config, lib, ... }:
|
|
||||||
{
|
|
||||||
patroni = lib.mkIf config.simulacrum {
|
|
||||||
databases = config.lib.forService "patroni" {
|
|
||||||
testdb.owner = "testuser";
|
|
||||||
};
|
|
||||||
users = config.lib.forService "patroni" {
|
|
||||||
testuser.locksmith = {
|
|
||||||
nodes = config.services.patroni.nodes.haproxy;
|
|
||||||
format = "pgpass";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
|
@ -1,91 +0,0 @@
|
||||||
{ cluster, ... }:
|
|
||||||
|
|
||||||
let
|
|
||||||
clusterName = "poseidon";
|
|
||||||
link = cluster.config.links.patroni-pg-access;
|
|
||||||
in
|
|
||||||
{
|
|
||||||
defaults = { depot, pkgs, ... }: {
|
|
||||||
environment.systemPackages = [
|
|
||||||
pkgs.jq
|
|
||||||
depot.packages.postgresql
|
|
||||||
];
|
|
||||||
services.patroni.settings.postgresql.pg_hba = [
|
|
||||||
"host postgres postgres 0.0.0.0/0 trust"
|
|
||||||
];
|
|
||||||
};
|
|
||||||
|
|
||||||
# taken from https://github.com/phfroidmont/nixpkgs/blob/patroni-module/nixos/tests/patroni.nix
|
|
||||||
testScript = ''
|
|
||||||
import json
|
|
||||||
nodeNames = json.loads('${builtins.toJSON cluster.config.services.patroni.nodes.worker}')
|
|
||||||
clientNames = json.loads('${builtins.toJSON cluster.config.services.patroni.nodes.haproxy}')
|
|
||||||
nodes = [ n for n in machines if n.name in nodeNames ]
|
|
||||||
clients = [ n for n in machines if n.name in clientNames ]
|
|
||||||
|
|
||||||
def booted(nodes):
|
|
||||||
return filter(lambda node: node.booted, nodes)
|
|
||||||
|
|
||||||
def wait_for_all_nodes_ready(expected_replicas=2):
|
|
||||||
booted_nodes = booted(nodes)
|
|
||||||
for node in booted_nodes:
|
|
||||||
node.wait_for_unit("patroni.service")
|
|
||||||
print(node.succeed("patronictl list ${clusterName}"))
|
|
||||||
node.wait_until_succeeds(f"[ $(patronictl list -f json ${clusterName} | jq 'length') == {expected_replicas + 1} ]")
|
|
||||||
node.wait_until_succeeds("[ $(patronictl list -f json ${clusterName} | jq 'map(select(.Role | test(\"^Leader$\"))) | map(select(.State | test(\"^running$\"))) | length') == 1 ]")
|
|
||||||
node.wait_until_succeeds(f"[ $(patronictl list -f json ${clusterName} | jq 'map(select(.Role | test(\"^Replica$\"))) | map(select(.State | test(\"^streaming$\"))) | length') == {expected_replicas} ]")
|
|
||||||
print(node.succeed("patronictl list ${clusterName}"))
|
|
||||||
for client in booted(clients):
|
|
||||||
client.wait_until_succeeds("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --command='select 1;'")
|
|
||||||
|
|
||||||
def run_dummy_queries():
|
|
||||||
for client in booted(clients):
|
|
||||||
client.succeed("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='insert into dummy(val) values (101);'")
|
|
||||||
client.succeed("test $(psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='select val from dummy where val = 101;') -eq 101")
|
|
||||||
client.succeed("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='delete from dummy where val = 101;'")
|
|
||||||
|
|
||||||
start_all()
|
|
||||||
|
|
||||||
with subtest("should bootstrap a new patroni cluster"):
|
|
||||||
wait_for_all_nodes_ready()
|
|
||||||
|
|
||||||
with subtest("should be able to insert and select"):
|
|
||||||
booted_clients = list(booted(clients))
|
|
||||||
booted_clients[0].succeed("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --command='create table dummy as select * from generate_series(1, 100) as val;'")
|
|
||||||
for client in booted_clients:
|
|
||||||
client.succeed("test $(psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='select count(distinct val) from dummy;') -eq 100")
|
|
||||||
|
|
||||||
with subtest("should restart after all nodes are crashed"):
|
|
||||||
for node in nodes:
|
|
||||||
node.crash()
|
|
||||||
for node in nodes:
|
|
||||||
node.start()
|
|
||||||
wait_for_all_nodes_ready()
|
|
||||||
|
|
||||||
with subtest("should be able to run queries while any one node is crashed"):
|
|
||||||
masterNodeName = nodes[0].succeed("patronictl list -f json ${clusterName} | jq '.[] | select(.Role | test(\"^Leader$\")) | .Member' -r").strip()
|
|
||||||
masterNodeIndex = next((i for i, v in enumerate(nodes) if v.name == masterNodeName))
|
|
||||||
|
|
||||||
# Move master node at the end of the list to avoid multiple failovers (makes the test faster and more consistent)
|
|
||||||
nodes.append(nodes.pop(masterNodeIndex))
|
|
||||||
|
|
||||||
for node in nodes:
|
|
||||||
node.crash()
|
|
||||||
wait_for_all_nodes_ready(1)
|
|
||||||
|
|
||||||
# Execute some queries while a node is down.
|
|
||||||
run_dummy_queries()
|
|
||||||
|
|
||||||
# Restart crashed node.
|
|
||||||
node.start()
|
|
||||||
wait_for_all_nodes_ready()
|
|
||||||
|
|
||||||
# Execute some queries with the node back up.
|
|
||||||
run_dummy_queries()
|
|
||||||
|
|
||||||
with subtest("should create databases and users via incandescence"):
|
|
||||||
for client in clients:
|
|
||||||
client.succeed(f"PGPASSFILE=/run/locksmith/patroni-testuser psql -h ${link.ipv4} -p ${link.portStr} -U testuser -d testdb --command='create table test_table_{client.name} as select * from generate_series(1, 10) as val;'")
|
|
||||||
client.fail("PGPASSFILE=/run/locksmith/patroni-testuser psql -h ${link.ipv4} -p ${link.portStr} -U testuser -d postgres --command='select * from dummy;'")
|
|
||||||
'';
|
|
||||||
}
|
|
|
@ -25,10 +25,6 @@ in
|
||||||
"d '${baseDir}' 0700 patroni patroni - -"
|
"d '${baseDir}' 0700 patroni patroni - -"
|
||||||
"d '${walDir}' 0700 patroni patroni - -"
|
"d '${walDir}' 0700 patroni patroni - -"
|
||||||
];
|
];
|
||||||
systemd.services.patroni = {
|
|
||||||
requires = [ "consul-ready.service" ];
|
|
||||||
after = [ "consul-ready.service" ];
|
|
||||||
};
|
|
||||||
services.patroni = {
|
services.patroni = {
|
||||||
enable = true;
|
enable = true;
|
||||||
name = hostName;
|
name = hostName;
|
||||||
|
@ -61,7 +57,6 @@ in
|
||||||
};
|
};
|
||||||
use_pg_rewind = true;
|
use_pg_rewind = true;
|
||||||
use_slots = true;
|
use_slots = true;
|
||||||
synchronous_mode = true;
|
|
||||||
authentication = {
|
authentication = {
|
||||||
replication.username = "patronirep";
|
replication.username = "patronirep";
|
||||||
rewind.username = "patronirew";
|
rewind.username = "patronirew";
|
||||||
|
@ -72,7 +67,6 @@ in
|
||||||
wal_level = "replica";
|
wal_level = "replica";
|
||||||
hot_standby_feedback = "on";
|
hot_standby_feedback = "on";
|
||||||
unix_socket_directories = "/tmp";
|
unix_socket_directories = "/tmp";
|
||||||
synchronous_commit = "on";
|
|
||||||
};
|
};
|
||||||
pg_hba = [
|
pg_hba = [
|
||||||
"host replication patronirep ${net} scram-sha-256"
|
"host replication patronirep ${net} scram-sha-256"
|
||||||
|
|
|
@ -35,6 +35,11 @@ in
|
||||||
inherit (self'.packages) keycloak;
|
inherit (self'.packages) keycloak;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
patroni = pkgs.callPackage ./patroni.nix {
|
||||||
|
inherit (self) nixosModules;
|
||||||
|
inherit (self'.packages) postgresql;
|
||||||
|
};
|
||||||
|
|
||||||
s3ql-upgrade = pkgs.callPackage ./s3ql-upgrade.nix {
|
s3ql-upgrade = pkgs.callPackage ./s3ql-upgrade.nix {
|
||||||
inherit (self'.packages) s3ql;
|
inherit (self'.packages) s3ql;
|
||||||
inherit (self) nixosModules;
|
inherit (self) nixosModules;
|
||||||
|
|
211
packages/checks/patroni.nix
Normal file
211
packages/checks/patroni.nix
Normal file
|
@ -0,0 +1,211 @@
|
||||||
|
{ nixosTest, nixosModules, postgresql }:
|
||||||
|
|
||||||
|
# taken from https://github.com/phfroidmont/nixpkgs/blob/patroni-module/nixos/tests/patroni.nix
|
||||||
|
nixosTest (
|
||||||
|
let
|
||||||
|
nodesIps = [
|
||||||
|
"192.168.1.1"
|
||||||
|
"192.168.1.2"
|
||||||
|
"192.168.1.3"
|
||||||
|
];
|
||||||
|
|
||||||
|
createNode = index: { pkgs, ... }:
|
||||||
|
let
|
||||||
|
ip = builtins.elemAt nodesIps index; # since we already use IPs to identify servers
|
||||||
|
in
|
||||||
|
{
|
||||||
|
imports = [
|
||||||
|
nixosModules.patroni
|
||||||
|
nixosModules.systemd-extras
|
||||||
|
];
|
||||||
|
|
||||||
|
networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkOverride 0 [
|
||||||
|
{ address = ip; prefixLength = 16; }
|
||||||
|
];
|
||||||
|
|
||||||
|
networking.firewall.allowedTCPPorts = [ 5432 8008 5010 ];
|
||||||
|
|
||||||
|
environment.systemPackages = [ pkgs.jq ];
|
||||||
|
|
||||||
|
services.patroni = {
|
||||||
|
|
||||||
|
enable = true;
|
||||||
|
|
||||||
|
postgresqlPackage = postgresql.withPackages (p: [ p.pg_safeupdate ]);
|
||||||
|
|
||||||
|
scope = "cluster1";
|
||||||
|
name = "node${toString(index + 1)}";
|
||||||
|
nodeIp = ip;
|
||||||
|
otherNodesIps = builtins.filter (h: h != ip) nodesIps;
|
||||||
|
softwareWatchdog = true;
|
||||||
|
|
||||||
|
settings = {
|
||||||
|
bootstrap = {
|
||||||
|
dcs = {
|
||||||
|
ttl = 30;
|
||||||
|
loop_wait = 10;
|
||||||
|
retry_timeout = 10;
|
||||||
|
maximum_lag_on_failover = 1048576;
|
||||||
|
};
|
||||||
|
initdb = [
|
||||||
|
{ encoding = "UTF8"; }
|
||||||
|
"data-checksums"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
postgresql = {
|
||||||
|
use_pg_rewind = true;
|
||||||
|
use_slots = true;
|
||||||
|
authentication = {
|
||||||
|
replication = {
|
||||||
|
username = "replicator";
|
||||||
|
};
|
||||||
|
superuser = {
|
||||||
|
username = "postgres";
|
||||||
|
};
|
||||||
|
rewind = {
|
||||||
|
username = "rewind";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
parameters = {
|
||||||
|
listen_addresses = "${ip}";
|
||||||
|
wal_level = "replica";
|
||||||
|
hot_standby_feedback = "on";
|
||||||
|
unix_socket_directories = "/tmp";
|
||||||
|
};
|
||||||
|
pg_hba = [
|
||||||
|
"host replication replicator 192.168.1.0/24 md5"
|
||||||
|
# Unsafe, do not use for anything other than tests
|
||||||
|
"host all all 0.0.0.0/0 trust"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
etcd3 = {
|
||||||
|
host = "192.168.1.4:2379";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
environmentFiles = {
|
||||||
|
PATRONI_REPLICATION_PASSWORD = pkgs.writeText "replication-password" "postgres";
|
||||||
|
PATRONI_SUPERUSER_PASSWORD = pkgs.writeText "superuser-password" "postgres";
|
||||||
|
PATRONI_REWIND_PASSWORD = pkgs.writeText "rewind-password" "postgres";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
# We always want to restart so the tests never hang
|
||||||
|
systemd.services.patroni.serviceConfig.StartLimitIntervalSec = 0;
|
||||||
|
};
|
||||||
|
in
|
||||||
|
{
|
||||||
|
name = "patroni";
|
||||||
|
|
||||||
|
nodes = {
|
||||||
|
node1 = createNode 0;
|
||||||
|
node2 = createNode 1;
|
||||||
|
node3 = createNode 2;
|
||||||
|
|
||||||
|
etcd = { pkgs, ... }: {
|
||||||
|
|
||||||
|
networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkOverride 0 [
|
||||||
|
{ address = "192.168.1.4"; prefixLength = 16; }
|
||||||
|
];
|
||||||
|
|
||||||
|
services.etcd = {
|
||||||
|
enable = true;
|
||||||
|
listenClientUrls = [ "http://192.168.1.4:2379" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
networking.firewall.allowedTCPPorts = [ 2379 ];
|
||||||
|
};
|
||||||
|
|
||||||
|
client = { pkgs, ... }: {
|
||||||
|
environment.systemPackages = [ postgresql ];
|
||||||
|
|
||||||
|
networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkOverride 0 [
|
||||||
|
{ address = "192.168.2.1"; prefixLength = 16; }
|
||||||
|
];
|
||||||
|
|
||||||
|
services.haproxy = {
|
||||||
|
enable = true;
|
||||||
|
config = ''
|
||||||
|
global
|
||||||
|
maxconn 100
|
||||||
|
|
||||||
|
defaults
|
||||||
|
log global
|
||||||
|
mode tcp
|
||||||
|
retries 2
|
||||||
|
timeout client 30m
|
||||||
|
timeout connect 4s
|
||||||
|
timeout server 30m
|
||||||
|
timeout check 5s
|
||||||
|
|
||||||
|
listen cluster1
|
||||||
|
bind 127.0.0.1:5432
|
||||||
|
option httpchk
|
||||||
|
http-check expect status 200
|
||||||
|
default-server inter 3s fall 3 rise 2 on-marked-down shutdown-sessions
|
||||||
|
${builtins.concatStringsSep "\n" (map (ip: "server postgresql_${ip}_5432 ${ip}:5432 maxconn 100 check port 8008") nodesIps)}
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
testScript = ''
|
||||||
|
nodes = [node1, node2, node3]
|
||||||
|
|
||||||
|
def wait_for_all_nodes_ready(expected_replicas=2):
|
||||||
|
booted_nodes = filter(lambda node: node.booted, nodes)
|
||||||
|
for node in booted_nodes:
|
||||||
|
print(node.succeed("patronictl list cluster1"))
|
||||||
|
node.wait_until_succeeds(f"[ $(patronictl list -f json cluster1 | jq 'length') == {expected_replicas + 1} ]")
|
||||||
|
node.wait_until_succeeds("[ $(patronictl list -f json cluster1 | jq 'map(select(.Role | test(\"^Leader$\"))) | map(select(.State | test(\"^running$\"))) | length') == 1 ]")
|
||||||
|
node.wait_until_succeeds(f"[ $(patronictl list -f json cluster1 | jq 'map(select(.Role | test(\"^Replica$\"))) | map(select(.State | test(\"^streaming$\"))) | length') == {expected_replicas} ]")
|
||||||
|
print(node.succeed("patronictl list cluster1"))
|
||||||
|
client.wait_until_succeeds("psql -h 127.0.0.1 -U postgres --command='select 1;'")
|
||||||
|
|
||||||
|
def run_dummy_queries():
|
||||||
|
client.succeed("psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='insert into dummy(val) values (101);'")
|
||||||
|
client.succeed("test $(psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='select val from dummy where val = 101;') -eq 101")
|
||||||
|
client.succeed("psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='delete from dummy where val = 101;'")
|
||||||
|
|
||||||
|
start_all()
|
||||||
|
|
||||||
|
with subtest("should bootstrap a new patroni cluster"):
|
||||||
|
wait_for_all_nodes_ready()
|
||||||
|
|
||||||
|
with subtest("should be able to insert and select"):
|
||||||
|
client.succeed("psql -h 127.0.0.1 -U postgres --command='create table dummy as select * from generate_series(1, 100) as val;'")
|
||||||
|
client.succeed("test $(psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='select count(distinct val) from dummy;') -eq 100")
|
||||||
|
|
||||||
|
with subtest("should restart after all nodes are crashed"):
|
||||||
|
for node in nodes:
|
||||||
|
node.crash()
|
||||||
|
for node in nodes:
|
||||||
|
node.start()
|
||||||
|
wait_for_all_nodes_ready()
|
||||||
|
|
||||||
|
with subtest("should be able to run queries while any one node is crashed"):
|
||||||
|
masterNodeName = node1.succeed("patronictl list -f json cluster1 | jq '.[] | select(.Role | test(\"^Leader$\")) | .Member' -r").strip()
|
||||||
|
masterNodeIndex = int(masterNodeName[len(masterNodeName)-1]) - 1
|
||||||
|
|
||||||
|
# Move master node at the end of the list to avoid multiple failovers (makes the test faster and more consistent)
|
||||||
|
nodes.append(nodes.pop(masterNodeIndex))
|
||||||
|
|
||||||
|
for node in nodes:
|
||||||
|
node.crash()
|
||||||
|
wait_for_all_nodes_ready(1)
|
||||||
|
|
||||||
|
# Execute some queries while a node is down.
|
||||||
|
run_dummy_queries()
|
||||||
|
|
||||||
|
# Restart crashed node.
|
||||||
|
node.start()
|
||||||
|
wait_for_all_nodes_ready()
|
||||||
|
|
||||||
|
# Execute some queries with the node back up.
|
||||||
|
run_dummy_queries()
|
||||||
|
'';
|
||||||
|
})
|
Loading…
Add table
Reference in a new issue