diff --git a/packages/checks/default.nix b/packages/checks/default.nix index 02b9853..9909e51 100644 --- a/packages/checks/default.nix +++ b/packages/checks/default.nix @@ -1,7 +1,7 @@ { self, ... }: { - perSystem = { filters, pkgs, self', ... }: { + perSystem = { filters, pkgs, self', timeTravel', ... }: { checks = filters.doFilter filters.checks { keycloak = pkgs.callPackage ./keycloak-custom-jre.nix { jre = self'.packages.jre17_standard; @@ -11,6 +11,11 @@ inherit (self) nixosModules; inherit (self'.packages) postgresql; }; + patroni-migration = pkgs.callPackage ./patroni-migration.nix { + previous = timeTravel' "486161b78e45e94a6f314b65bb05080605f0cd01"; + inherit (self) nixosModules; + inherit (self'.packages) postgresql; + }; searxng = pkgs.callPackage ./searxng.nix { inherit (self'.packages) searxng; }; diff --git a/packages/checks/patroni-migration.nix b/packages/checks/patroni-migration.nix new file mode 100644 index 0000000..b075ca6 --- /dev/null +++ b/packages/checks/patroni-migration.nix @@ -0,0 +1,261 @@ +{ nixosTest, nixosModules, postgresql, previous }: + +nixosTest ( + let + pgOld = previous.packages.postgresql; + pgNew = postgresql; + + nodesIps = [ + "192.168.1.1" + "192.168.1.2" + "192.168.1.3" + ]; + + createNode = index: postgresql: { pkgs, ... }: + let + ip = builtins.elemAt nodesIps index; # since we already use IPs to identify servers + in + { + imports = [ + nixosModules.patroni + nixosModules.systemd-extras + ]; + + networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkOverride 0 [ + { address = ip; prefixLength = 16; } + ]; + + networking.firewall.allowedTCPPorts = [ 5432 8008 5010 ]; + + environment.systemPackages = [ pkgs.jq ]; + + systemd.tmpfiles.rules = [ + "d /data 0700 patroni patroni - -" + ]; + services.patroni = { + + enable = true; + + migrations = { + enable = true; + }; + + dataDir = "/data/patroni"; + postgresqlDataDir = "/data/postgres"; + + postgresqlPackage = postgresql.withPackages (p: [ p.pg_safeupdate ]); + + scope = "cluster1"; + name = "node${toString(index + 1)}"; + nodeIp = ip; + otherNodesIps = builtins.filter (h: h != ip) nodesIps; + softwareWatchdog = true; + + settings = { + bootstrap = { + dcs = { + ttl = 30; + loop_wait = 10; + retry_timeout = 10; + maximum_lag_on_failover = 1048576; + }; + initdb = [ + { encoding = "UTF8"; } + "data-checksums" + ]; + }; + + postgresql = { + use_pg_rewind = true; + use_slots = true; + authentication = { + replication = { + username = "replicator"; + }; + superuser = { + username = "postgres"; + }; + rewind = { + username = "rewind"; + }; + }; + parameters = { + listen_addresses = "${ip}"; + wal_level = "replica"; + hot_standby_feedback = "on"; + unix_socket_directories = "/tmp"; + }; + pg_hba = [ + "host replication replicator 192.168.1.0/24 md5" + # Unsafe, do not use for anything other than tests + "host all all 0.0.0.0/0 trust" + ]; + }; + + consul = { + host = "192.168.1.4:8500"; + register_service = true; + }; + }; + + environmentFiles = { + PATRONI_REPLICATION_PASSWORD = pkgs.writeText "replication-password" "postgres"; + PATRONI_SUPERUSER_PASSWORD = pkgs.writeText "superuser-password" "postgres"; + PATRONI_REWIND_PASSWORD = pkgs.writeText "rewind-password" "postgres"; + }; + }; + + # We always want to restart so the tests never hang + systemd.services.patroni.serviceConfig.StartLimitIntervalSec = 0; + }; + in + { + name = "patroni"; + + nodes = { + node1 = createNode 0 pgOld; + node2 = createNode 1 pgOld; + node3 = createNode 2 pgOld; + node1new = createNode 0 pgNew; + node2new = createNode 1 pgNew; + node3new = createNode 2 pgNew; + + consul = { pkgs, ... }: { + + networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkOverride 0 [ + { address = "192.168.1.4"; prefixLength = 16; } + ]; + + services.consul = { + enable = true; + extraConfig = { + addresses.http = "192.168.1.4"; + server = true; + bind_addr = "192.168.1.4"; + bootstrap_expect = 1; + }; + }; + + networking.firewall.allowedTCPPorts = [ 8500 ]; + }; + + client = { pkgs, ... }: { + environment.systemPackages = [ postgresql ]; + + systemd.services.db-writer = { + wantedBy = [ "multi-user.target" ]; + after = [ "haproxy.service" ]; + requires = [ "haproxy.service" ]; + serviceConfig.Type = "oneshot"; + script = '' + set +e + while ! ${pgNew}/bin/psql -h 127.0.0.1 -U postgres --command='create table dummy2 as select * from generate_series(1, 10) as val;'; do + sleep 2; + done + i=11 + version="$(${pgNew}/bin/psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='select version();')" + while sleep .5; do + newVersion="" + while [[ -z "$newVersion" ]]; do + newVersion="$(${pgNew}/bin/psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='select version();')" + sleep .5 + done + echo $newVersion + + while ! ${pgNew}/bin/psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command="insert into dummy2 values($i);"; do + retrying write for value $i + sleep .5 + done + echo wrote value $i + i=$((i+1)) + + if [[ "$newVersion" != "$version" ]]; then + echo new version detected, quitting + exit 0 + fi + done + ''; + }; + networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkOverride 0 [ + { address = "192.168.2.1"; prefixLength = 16; } + ]; + + services.haproxy = { + enable = true; + config = '' + global + maxconn 100 + + defaults + log global + mode tcp + retries 2 + timeout client 30m + timeout connect 4s + timeout server 30m + timeout check 5s + + listen cluster1 + bind 127.0.0.1:5432 + option httpchk + http-check expect status 200 + default-server inter 3s fall 3 rise 2 on-marked-down shutdown-sessions + ${builtins.concatStringsSep "\n" (map (ip: "server postgresql_${ip}_5432 ${ip}:5432 maxconn 100 check port 8008") nodesIps)} + ''; + }; + }; + }; + + + + testScript = /*python*/ '' + nodes = [node1, node2, node3] + nodes_new = [node1new, node2new, node3new] + node_pairs = [ + (1, node1, node1new), + (2, node2, node2new), + (3, node3, node3new) + ] + + def wait_for_all_nodes_ready(nodes=nodes, expected_replicas=2): + booted_nodes = filter(lambda node: node.booted, nodes) + for node in booted_nodes: + print(node.succeed("patronictl list cluster1")) + node.wait_until_succeeds(f"[ $(patronictl list -f json cluster1 | jq 'length') == {expected_replicas + 1} ]") + node.wait_until_succeeds("[ $(patronictl list -f json cluster1 | jq 'map(select(.Role | test(\"^Leader$\"))) | map(select(.State | test(\"^running$\"))) | length') == 1 ]") + node.wait_until_succeeds(f"[ $(patronictl list -f json cluster1 | jq 'map(select(.Role | test(\"^Replica$\"))) | map(select(.State | test(\"^running$\"))) | length') == {expected_replicas} ]") + print(node.succeed("patronictl list cluster1")) + client.wait_until_succeeds("psql -h 127.0.0.1 -U postgres --command='select 1;'") + + def run_dummy_queries(): + client.succeed("psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='insert into dummy(val) values (101);'") + client.succeed("test $(psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='select val from dummy where val = 101;') -eq 101") + client.succeed("psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='delete from dummy where val = 101;'") + + consul.start() + client.start() + for node in nodes: + node.start() + + with subtest("should bootstrap a new patroni cluster"): + wait_for_all_nodes_ready() + + with subtest("should be able to insert and select"): + client.succeed("psql -h 127.0.0.1 -U postgres --command='create table dummy as select * from generate_series(1, 100) as val;'") + client.succeed("test $(psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='select count(distinct val) from dummy;') -eq 100") + + with subtest("should upgrade to a new major version"): + for (i, old, new) in node_pairs: + old.succeed("systemctl stop patroni") + old.succeed(f"tar cf /tmp/shared/data{i}.tar /data") + old.shutdown() + new.succeed(f"tar xf /tmp/shared/data{i}.tar -C /") + + with subtest("should be able to read and write after upgrade"): + wait_for_all_nodes_ready(nodes=nodes_new) + run_dummy_queries() + + with subtest("should not have lost any data"): + client.succeed("test $(psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='select count(distinct val) from dummy2;') -eq $(psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='select max(val) from dummy2;')") + ''; + })