Compare commits

..

52 commits

Author SHA1 Message Date
Max
00f233e8a5 cluster/services/frangiclave: funny 2024-08-12 03:04:14 +02:00
Max
e2fe73039c cluster/services/frangiclave: test in simulacrum WIP 2024-08-12 03:04:14 +02:00
Max
e4f09218d5 cluster/services/frangiclave: retry_join 2024-08-12 03:04:14 +02:00
Max
ebc9b88c8d cluster/services/frangiclave: some cluster stuff 2024-08-12 03:04:14 +02:00
Max
ea3414c427 cluster/services/frangiclave: init trivial WIP 2024-08-12 03:04:14 +02:00
Max
6eee030b7a cluster/services/storage: register existing keys and buckets in incandescence 2024-08-12 03:04:03 +02:00
Max
75cebf4ed6 cluster/services/incandescence: add base layout for ascensions 2024-08-12 03:04:03 +02:00
Max
bc3cd82731 cluster/services/consul: implement runConsul incantation 2024-08-12 03:04:03 +02:00
Max
9cdf964c6c cluster/services/forge: define db 2024-08-12 03:04:03 +02:00
Max
bb22fe0176 cluster/services/patroni: implement raw format for locksmith provider 2024-08-12 03:04:03 +02:00
Max
d1f2bc1227 cluster/services/storage: define snakeoil passphrase for heresy, ensure encryption 2024-08-12 03:04:03 +02:00
Max
a48ee00f3d cluster/services/ways: add simulacrum deps 2024-08-12 03:04:03 +02:00
Max
9ed3655ccf cluster/services/storage: use recursive simulacrum deps 2024-08-12 03:04:03 +02:00
Max
945698a3ea cluster/services/patroni: add simulacrum deps 2024-08-12 03:04:03 +02:00
Max
f75c7b8522 cluster/services/locksmith: add simulacrum deps 2024-08-12 03:04:03 +02:00
Max
b258bab23e cluster/services/incandescence: add simulacrum deps 2024-08-12 03:04:03 +02:00
Max
e2296eace7 cluster/services/chant: add simulacrum deps 2024-08-12 03:04:03 +02:00
Max
304ae6e53c cluster/simulacrum: recursive service deps 2024-08-12 03:04:03 +02:00
Max
f322208f66 cluster/services/acme-client: implement augment for external ACME services 2024-08-12 03:04:03 +02:00
Max
7c4615ecfb cluster/simulacrum: implement nowhere, fix networking 2024-08-12 03:04:03 +02:00
Max
ec38e10fa9 cluster/services/forge: use forService 2024-08-12 03:04:03 +02:00
Max
5d9ff62afe cluster/services/dns: use patroni incandescence 2024-08-12 03:04:03 +02:00
Max
6d78b69601 cluster/services/patroni: implement incandescence provider for databases and users 2024-08-12 03:04:03 +02:00
Max
7129d44078 cluster/services/locksmith: only run secret generation command once 2024-08-12 03:04:03 +02:00
Max
76d205d114 cluster/services/locksmith: support skipping secret updates 2024-08-12 03:04:03 +02:00
Max
c8c9a6fbce modules/external-storage: implement detectFs for s3c4 2024-08-12 03:04:03 +02:00
Max
a1cad2efcd cluster/services/storage: use locksmith secrets for external storage 2024-08-12 03:04:03 +02:00
Max
c7f4e59908 cluster/services/storage: adjust test 2024-08-12 03:04:03 +02:00
Max
baed1ce871 cluster/services/storage: use incandescence 2024-08-12 03:04:03 +02:00
Max
014c1f9cd2 cluster/services/incandescence: init 2024-08-12 03:04:03 +02:00
Max
34704c8f08 modules/external-storage: support locksmith secrets 2024-08-12 03:04:03 +02:00
Max
ccc2a47880 cluster/services/storage: implement s3ql key format 2024-08-12 03:04:03 +02:00
Max
05cd729e90 cluster/services/hercules-ci-multi-agent: use forService 2024-08-12 03:04:03 +02:00
Max
8d0a2f00cc cluster/services/monitoring: use forService 2024-08-12 03:04:03 +02:00
Max
ff26e1ebc1 checks/garage: drop 2024-08-12 03:04:03 +02:00
Max
b848084dd8 packages/catalog: expose simulacrum checks differently 2024-08-12 03:04:03 +02:00
Max
fe8ddd4094 cluster/simulacrum: expose checks 2024-08-12 03:04:03 +02:00
Max
030b680b33 cluster/services/forge: use forService 2024-08-12 03:04:03 +02:00
Max
b453b0bb21 cluster/services/attic: use forService 2024-08-12 03:04:03 +02:00
Max
b6e0390555 cluster/lib: implement config.lib.forService for better option filtering 2024-08-12 03:04:03 +02:00
Max
bbe3373c2e cluster/simulacrum: set testConfig 2024-08-12 03:04:03 +02:00
Max
0ed4870b65 cluster/lib: introduce testConfig 2024-08-12 03:04:03 +02:00
Max
8ec13f5c87 cluster/services/storage: test in simulacrum 2024-08-12 03:04:03 +02:00
Max
5d52f72940 cluster/services/consul: test in simulacrum 2024-08-12 03:04:03 +02:00
Max
1af67b80ed cluster/services/wireguard: make simulacrum compatible 2024-08-12 03:04:03 +02:00
Max
a810717843 cluster/catalog: support snakeoil secrets 2024-08-12 03:04:03 +02:00
Max
bd39fc5d07 cluster/simulacrum: init 2024-08-12 03:04:03 +02:00
Max
25c001c182 cluster/lib: implement simulacrum options 2024-08-12 03:04:03 +02:00
Max
d944dee3bc WIP ENABLE DEBUG MODE 2024-08-12 02:56:57 +02:00
Max
aac5163a8b cluster/lib: implement injectNixosConfigForServices to select individual services 2024-08-12 02:56:57 +02:00
Max
01c74f62cf checks: add fake external storage module 2024-08-12 02:56:57 +02:00
Max
0110a4a0c3 checks: add a bunch of snakeoil keys 2024-08-12 02:56:57 +02:00
61 changed files with 526 additions and 938 deletions

View file

@ -46,6 +46,7 @@ in
};
}) // (if secretConfig.shared then let
secretFile = "${svcName}-${secretName}.age";
snakeoilFile = "${svcName}-${secretName}-snakeoil.txt";
in {
editSecret = {
description = "Edit this secret";
@ -54,15 +55,31 @@ in
agenix -e '${secretFile}'
'';
};
} else lib.mapAttrs' (name: lib.nameValuePair "editSecretInstance-${name}") (lib.genAttrs secretConfig.nodes (node: let
secretFile = "${svcName}-${secretName}-${node}.age";
in {
description = "Edit this secret for '${node}'";
command = ''
${setupCommands secretFile [ node ]}
agenix -e '${secretFile}'
'';
})));
editSnakeoil = {
description = "Edit this secret's snakeoil";
command = ''
$EDITOR "$PRJ_ROOT/cluster/secrets"/'${snakeoilFile}'
'';
};
} else lib.mkMerge [
(lib.mapAttrs' (name: lib.nameValuePair "editSecretInstance-${name}") (lib.genAttrs secretConfig.nodes (node: let
secretFile = "${svcName}-${secretName}-${node}.age";
in {
description = "Edit this secret for '${node}'";
command = ''
${setupCommands secretFile [ node ]}
agenix -e '${secretFile}'
'';
})))
(lib.mapAttrs' (name: lib.nameValuePair "editSnakeoilInstance-${name}") (lib.genAttrs secretConfig.nodes (node: let
snakeoilFile = "${svcName}-${secretName}-${node}-snakeoil.txt";
in {
description = "Edit this secret's snakeoil for '${node}'";
command = ''
$EDITOR "$PRJ_ROOT/cluster/secrets"/'${snakeoilFile}'
'';
})))
]);
};
}) svcConfig.secrets))
lib.concatLists

View file

@ -3,14 +3,12 @@
{
hostLinks = lib.pipe config.services [
(lib.filterAttrs (_: svc: svc.meshLinks != {}))
(lib.mapAttrsToList (svcName: svc:
lib.mapAttrsToList (groupName: links:
lib.genAttrs svc.nodes.${groupName} (hostName: lib.mapAttrs (_: cfg: { ... }: {
imports = [ cfg.link ];
ipv4 = config.vars.mesh.${hostName}.meshIp;
}) links)
) svc.meshLinks
))
(lib.mapAttrsToList (svcName: svc: lib.mapAttrsToList (name: cfg: lib.genAttrs svc.nodes.${name} (hostName: {
${cfg.name} = { ... }: {
imports = [ cfg.link ];
ipv4 = config.vars.mesh.${hostName}.meshIp;
};
})) svc.meshLinks))
(map lib.mkMerge)
lib.mkMerge
];

View file

@ -38,8 +38,12 @@ in
};
meshLinks = mkOption {
description = "Create host links on the mesh network.";
type = types.attrsOf (types.attrsOf (types.submodule {
type = types.attrsOf (types.submodule ({ name, ... }: {
options = {
name = mkOption {
type = types.str;
default = "${serviceName}-${name}";
};
link = mkOption {
type = types.deferredModule;
default = {};

View file

@ -74,7 +74,7 @@ in
serviceConfig = {
Restart = "on-failure";
RestartMaxDelaySec = 30;
RestartSteps = 5;
RestartStesp = 5;
RestartMode = "direct";
};
};

View file

@ -16,7 +16,10 @@
./nar-serve.nix
];
};
meshLinks.server.attic.link.protocol = "http";
meshLinks.server = {
name = "attic";
link.protocol = "http";
};
secrets = let
inherit (config.services.attic) nodes;
in {
@ -49,12 +52,10 @@
cache.target = serverAddrs;
};
ways = config.lib.forService "attic" {
cache-api = {
consulService = "atticd";
extras.extraConfig = ''
client_max_body_size 4G;
'';
};
ways.cache-api = {
consulService = "atticd";
extras.extraConfig = ''
client_max_body_size 4G;
'';
};
}

View file

@ -80,7 +80,6 @@ in
mode = if isMonolith then "manual" else "direct";
definition = {
name = "atticd";
id = "atticd-${config.services.atticd.mode}";
address = link.ipv4;
inherit (link) port;
checks = [

View file

@ -10,6 +10,13 @@ let
in
{
system.extraIncantations = {
runConsul = i: script: i.execShellWith [ config.services.consul.package ] ''
export CONSUL_HTTP_ADDR='${config.links.consulAgent.tuple}'
${script}
'';
};
links.consulAgent.protocol = "http";
services.consul = {

View file

@ -1,24 +1,19 @@
{ lib, ... }:
{
defaults.options.services.locksmith = lib.mkSinkUndeclaredOptions { };
testScript = ''
import json
start_all()
with subtest("should form cluster"):
nodes = [ n for n in machines if n != nowhere ]
for machine in nodes:
for machine in machines:
machine.succeed("systemctl start consul-ready.service")
for machine in nodes:
for machine in machines:
consulConfig = json.loads(machine.succeed("cat /etc/consul.json"))
addr = consulConfig["addresses"]["http"]
port = consulConfig["ports"]["http"]
setEnv = f"CONSUL_HTTP_ADDR={addr}:{port}"
memberList = machine.succeed(f"{setEnv} consul members --status=alive")
for machine2 in nodes:
for machine2 in machines:
assert machine2.name in memberList
'';
}

View file

@ -0,0 +1,16 @@
age-encryption.org/v1
-> ssh-ed25519 NO562A YndVtONpmfFXYB1ASnPHsfczl1UbgZ2vccIrX2pEgx0
VzH2UD583L6wBLMCo6faIGyHR4+zXXOUTgQduEiFOxI
-> ssh-ed25519 5/zT0w +67r5S6PSFEgnrTu3eZpOd3eemZUdDOE+kjUw6GDgUM
jPzlW7hePFgsABUjryePu5yergQ2Qjczmmoxuo6CK+U
-> ssh-ed25519 TCgorQ DGJPjJYpeibxM+8OwofUCdttIT2OdNbvQ66wpWQM8XU
JCNQ3bT21j2ZsxbzA6FieKIui6lsvk1p0nvNOT7YtFo
-> ssh-ed25519 d3WGuA hIl5yluwf1f0DP5ZW1MalGPCj4XFYOu2sofwJSQZ6RE
BSHoe4cdRJlPrkc+taUIaIIUknexlGttzz2d9I3jtmk
-> ssh-ed25519 YIaSKQ EbqXS/XFQHSXCbzDJmg4gGUxP9TX3+vOxWtNQDJ8ih4
hNaWzoFG2iVef4Gm30LilGXYNsVkhmVt9dOvBo02mbM
-> V]i@xRtJ-grease
NEPxMUZa76GclWOasWptt6QS7frMclp9o+kD4KCLJB7ucFOYK7xxWfAEMkjtadfP
m0bbgbw7Jcs9/lA8VNAG2D5jTBayGgpkBQZ4
--- ViqZD8mJEKIMCZ5Q+wRQWR2FX/LMEfUwoumUtHlYabQ
KAÉû¹ÝgZü<šë*DfV6·=äG»+eœ`ºpª±ï÷­<1E>º[Û‘Û û¸¢ºÐý-H1<1B>»Ã›Íí[fV.¾¢HÁ"OhÐñŒ½j•ùö8ïßß$‰;Û‘&5<>äxw§/mŒë<C592>Öß^7îf5ÔµyÏŽÓûC´6”¹U•æýi-R=/_R<5F><52>„·==æà½1˜'Ò qÞ·ŒvÜcwø

View file

@ -35,13 +35,10 @@ in
];
before = [ "acme-securedns.${domain}.service" ];
wants = [ "acme-finished-securedns.${domain}.target" ];
serviceConfig = {
LoadCredential = [
"dot-cert.pem:${dot.directory}/fullchain.pem"
"dot-key.pem:${dot.directory}/key.pem"
];
ExecReload = lib.mkForce [];
};
serviceConfig.LoadCredential = [
"dot-cert.pem:${dot.directory}/fullchain.pem"
"dot-key.pem:${dot.directory}/key.pem"
];
};
security.acme.certs."securedns.${domain}" = {

View file

@ -56,11 +56,6 @@ in
coredns = ./coredns.nix;
client = ./client.nix;
};
simulacrum = {
enable = true;
deps = [ "consul" "acme-client" "patroni" ];
settings = ./test.nix;
};
};
patroni = {

View file

@ -1,35 +0,0 @@
{ cluster, ... }:
let
inherit (cluster._module.specialArgs.depot.lib.meta) domain;
in
{
nodes.nowhere = { pkgs, ... }: {
passthru = cluster;
environment.systemPackages = [
pkgs.knot-dns
pkgs.openssl
];
};
testScript = ''
import json
nodeNames = json.loads('${builtins.toJSON cluster.config.services.dns.nodes.authoritative}')
dotNames = json.loads('${builtins.toJSON cluster.config.services.dns.nodes.coredns}')
nodes = [ n for n in machines if n.name in nodeNames ]
dotServers = [ n for n in machines if n.name in dotNames ]
start_all()
with subtest("should allow external name resolution for own domain"):
for node in nodes:
node.wait_for_unit("coredns.service")
nowhere.wait_until_succeeds("[[ $(kdig +short securedns.${domain} | wc -l) -ne 0 ]]", timeout=60)
nowhere.fail("[[ $(kdig +short example.com | wc -l) -ne 0 ]]")
with subtest("should have valid certificate on DoT endpoint"):
for node in dotServers:
node.wait_for_unit("acme-finished-securedns.${domain}.target")
nowhere.wait_until_succeeds("openssl </dev/null s_client -connect securedns.${domain}:853 -verify_return_error -strict -verify_hostname securedns.${domain}", timeout=60)
'';
}

View file

@ -4,7 +4,10 @@
services.forge = {
nodes.server = [ "VEGAS" ];
nixos.server = ./server.nix;
meshLinks.server.forge.link.protocol = "http";
meshLinks.server = {
name = "forge";
link.protocol = "http";
};
secrets = with config.services.forge.nodes; {
oidcSecret = {
nodes = server;
@ -13,10 +16,10 @@
};
};
ways = let
ways.forge = let
host = builtins.head config.services.forge.nodes.server;
in config.lib.forService "forge" {
forge.target = config.hostLinks.${host}.forge.url;
target = config.hostLinks.${host}.forge.url;
};
patroni = config.lib.forService "forge" {

View file

@ -0,0 +1,27 @@
{ config, ... }:
{
services.frangiclave = {
nodes = {
server = [ "VEGAS" "grail" "prophet" ];
cluster = config.services.frangiclave.nodes.server;
agent = []; # all nodes, for vault-agent, secret templates, etc.
};
meshLinks = {
server.link.protocol = "http";
cluster.link.protocol = "http";
};
nixos = {
server = [
./server.nix
];
cluster = [];
agent = [];
};
simulacrum = {
enable = true;
deps = [ "wireguard" "consul" ];
settings = ./test.nix;
};
};
}

View file

@ -0,0 +1,34 @@
{ cluster, config, depot, lib, ... }:
let
apiLink = cluster.config.hostLinks.${config.networking.hostName}.frangiclave-server;
clusterLink = cluster.config.hostLinks.${config.networking.hostName}.frangiclave-cluster;
in
{
services.vault = {
enable = true;
package = depot.packages.openbao;
address = apiLink.tuple;
extraConfig = /*hcl*/ ''
api_addr = "${apiLink.url}"
cluster_addr = "${clusterLink.url}"
'';
storageBackend = "raft";
storageConfig = /*hcl*/ ''
node_id = "x${builtins.hashString "sha256" "frangiclave-node-${config.networking.hostName}"}"
${
lib.pipe (cluster.config.services.frangiclave.otherNodes.server config.networking.hostName) [
(map (node: cluster.config.hostLinks.${node}.frangiclave-server))
(map (link: /*hcl*/ ''
retry_join {
leader_api_addr = "${link.url}"
}
''))
(lib.concatStringsSep "\n")
]
}
'';
};
}

View file

@ -0,0 +1,12 @@
{ lib, ... }:
{
interactive.defaults = { cluster, config, ... }: {
config = lib.mkIf config.services.vault.enable {
environment.variables.VAULT_ADDR = cluster.config.hostLinks.${config.networking.hostName}.frangiclave-server.url;
environment.systemPackages = [ config.services.vault.package ];
};
};
testScript = "assert False";
}

View file

@ -33,9 +33,6 @@ in
ldapbindaddress = "${ldapLink.ipv4}:${ldapLink.portStr}";
origin = frontendLink.url;
inherit domain;
online_backup = {
versions = 7;
};
};
};

View file

@ -3,7 +3,6 @@
{
imports = [
./options.nix
./simulacrum/test-data.nix
];
services.incandescence = {
@ -16,10 +15,6 @@
./provider-options.nix
];
};
simulacrum = {
enable = true;
deps = [ "consul" "locksmith" ];
settings = ./simulacrum/test.nix;
};
simulacrum.deps = [ "consul" ];
};
}

View file

@ -1,7 +1,7 @@
{ cluster, config, lib, ... }:
let
inherit (lib) concatStringsSep escapeShellArg flatten filter filterAttrs length mapAttrs mapAttrs' mapAttrsToList mkIf mkMerge optionalString pipe stringToCharacters;
inherit (lib) concatStringsSep escapeShellArg flatten filter filterAttrs length mapAttrs mapAttrs' mapAttrsToList mkIf mkMerge pipe stringToCharacters;
cfg = config.services.incandescence;
clusterCfg = cluster.config.incandescence;
@ -39,9 +39,6 @@ in
fi
''))
(concatStringsSep "\n")
(script: if script == "" then ''
echo "Nothing to create"
'' else script)
];
};
"ignite-${provider}-${formula}-change" = mkIf (formulaConfig.change != null) {
@ -61,16 +58,14 @@ in
) || echo "Change failed: ${object}"
''))
(concatStringsSep "\n")
(script: if script == "" then ''
echo "Nothing to change"
'' else script)
];
};
"ignite-${provider}-${formula}-destroy" = {
description = "Ignite Destruction: ${provider} - ${formula}";
wantedBy = [ "incandescence-${provider}.target" ] ++ map (dep: "ignite-${provider}-${dep}-destroy.service") formulaConfig.deps;
before = [ "incandescence-${provider}.target" ] ++ map (dep: "ignite-${provider}-${dep}-destroy.service") formulaConfig.deps;
inherit (providerConfig) wants after;
wants = providerConfig.wants ++ [ "ignite-${provider}-${formula}-change.service" ];
after = providerConfig.after ++ [ "ignite-${provider}-${formula}-change.service" ];
serviceConfig.Type = "oneshot";
distributed.enable = true;
path = [ config.services.consul.package ] ++ providerConfig.packages;
@ -82,15 +77,13 @@ in
(builtins.add 2)
toString
];
needsFilter = clusterCfg.providers.${provider}.objects.${formula} != [];
keyFilter = pipe clusterCfg.providers.${provider}.objects.${formula} [
(map (x: escapeShellArg "^${x}$"))
(concatStringsSep " \\\n -e ")
];
destroyAfterDays = toString formulaConfig.destroyAfterDays;
grep = optionalString needsFilter "grep -v -e ${keyFilter} |";
in ''
consul kv get --keys ${kvRoot}/ | cut -d/ -f${fieldNum} | ${grep} while read object; do
consul kv get --keys ${kvRoot}/ | cut -d/ -f${fieldNum} | grep -v -e ${keyFilter} | while read object; do
if consul kv get ${kvRoot}/$object/alive >/dev/null; then
destroyOn="$(consul kv get ${kvRoot}/$object/destroyOn || true)"
if [[ -z "$destroyOn" && "${destroyAfterDays}" -ne 0 ]]; then

View file

@ -1,8 +0,0 @@
{ config, lib, ... }:
{
incandescence = lib.mkIf config.simulacrum {
providers = config.lib.forService "incandescence" {
test.objects.example = [ "example1" "example2" ];
};
};
}

View file

@ -1,47 +0,0 @@
{ cluster, lib, ... }:
let
providers = lib.take 2 cluster.config.services.incandescence.nodes.provider;
in
{
nodes = lib.genAttrs providers (lib.const {
services.incandescence.providers.test = {
wantedBy = [ "multi-user.target" ];
partOf = [ ];
formulae.example = {
create = x: "consul kv put testData/${x} ${x}";
destroy = "consul kv delete testData/$OBJECT";
};
};
});
testScript = ''
import json
nodeNames = json.loads('${builtins.toJSON providers}')
nodes = [ n for n in machines if n.name in nodeNames ]
start_all()
consulConfig = json.loads(nodes[0].succeed("cat /etc/consul.json"))
addr = consulConfig["addresses"]["http"]
port = consulConfig["ports"]["http"]
setEnv = f"CONSUL_HTTP_ADDR={addr}:{port}"
with subtest("should create objects"):
for node in nodes:
node.wait_for_unit("incandescence-test.target")
nodes[0].succeed(f"[[ $({setEnv} consul kv get testData/example1) == example1 ]]")
nodes[0].succeed(f"[[ $({setEnv} consul kv get testData/example2) == example2 ]]")
with subtest("should destroy objects"):
nodes[0].succeed(f"{setEnv} consul kv put testData/example3 example3")
nodes[0].succeed(f"{setEnv} consul kv put services/incandescence/providers/test/formulae/example/example3/alive true")
nodes[1].succeed(f"{setEnv} consul kv get testData/example3")
for node in nodes:
node.systemctl("isolate default")
for node in nodes:
node.wait_for_unit("incandescence-test.target")
nodes[0].fail(f"{setEnv} consul kv get testData/example3")
'';
}

View file

@ -29,7 +29,10 @@
io-tweaks = [ "VEGAS" ];
remote-api = [ "VEGAS" ];
};
meshLinks.gateway.ipfsGateway.link.protocol = "http";
meshLinks.gateway = {
name = "ipfsGateway";
link.protocol = "http";
};
nixos = {
node = [
./node.nix

View file

@ -9,7 +9,7 @@ in
environment = {
OTEL_TRACES_EXPORTER = "otlp";
OTEL_EXPORTER_OTLP_PROTOCOL = "grpc";
OTEL_EXPORTER_OTLP_ENDPOINT = "${cluster.config.ways.ingest-traces-otlp.url}:443";
OTEL_EXPORTER_OTLP_ENDPOINT = cluster.config.links.tempo-otlp-grpc.url;
OTEL_TRACES_SAMPLER = "parentbased_traceidratio";
OTEL_TRACES_SAMPLER_ARG = "0.50";
};

View file

@ -26,7 +26,7 @@ in {
name = "logging";
positions.filename = "\${STATE_DIRECTORY:/tmp}/logging-positions.yaml";
clients = singleton {
url = "${cluster.config.ways.ingest-logs.url}/loki/api/v1/push";
url = "${cluster.config.ways.monitoring-logs.url}/loki/api/v1/push";
};
scrape_configs = singleton {
job_name = "journal";

View file

@ -18,6 +18,26 @@ in
protocol = "http";
ipv4 = meshIpFor "server";
};
tempo = {
protocol = "http";
ipv4 = meshIpFor "server";
};
tempo-grpc = {
protocol = "http";
ipv4 = "127.0.0.1";
};
tempo-otlp-http = {
protocol = "http";
ipv4 = meshIpFor "server";
};
tempo-otlp-grpc = {
protocol = "http";
ipv4 = meshIpFor "server";
};
tempo-zipkin-http = {
protocol = "http";
ipv4 = meshIpFor "server";
};
};
hostLinks = lib.genAttrs config.services.monitoring.nodes.grafana (name: {
grafana = {
@ -31,7 +51,6 @@ in
blackbox = [ "checkmate" "grail" "prophet" ];
grafana = [ "VEGAS" "prophet" ];
logging = [ "VEGAS" "grail" ];
tracing = [ "VEGAS" "grail" ];
server = [ "VEGAS" ];
};
nixos = {
@ -42,19 +61,14 @@ in
./provisioning/dashboards.nix
];
logging = ./logging.nix;
tracing = ./tracing.nix;
server = [
./server.nix
./tracing.nix
];
};
meshLinks = {
logging.loki.link.protocol = "http";
tracing = {
tempo.link.protocol = "http";
tempo-otlp-http.link.protocol = "http";
tempo-otlp-grpc.link.protocol = "grpc";
tempo-zipkin-http.link.protocol = "http";
};
meshLinks.logging = {
name = "loki";
link.protocol = "http";
};
};
@ -68,51 +82,29 @@ in
nodes = config.services.monitoring.nodes.logging;
format = "envFile";
};
tempo-ingest.locksmith = {
nodes = config.services.monitoring.nodes.tracing;
format = "envFile";
};
tempo-query.locksmith = {
nodes = config.services.monitoring.nodes.tracing;
format = "envFile";
};
tempo = { };
};
buckets = {
loki-chunks.allow = {
loki-ingest = [ "read" "write" ];
loki-query = [ "read" ];
};
tempo-chunks.allow = {
tempo-ingest = [ "read" "write" ];
tempo-query = [ "read" ];
};
tempo-chunks.allow.tempo = [ "read" "write" ];
};
};
ways = let
query = consulService: {
inherit consulService;
internal = true;
extras.extraConfig = ''
proxy_read_timeout 3600s;
'';
ways = {
monitoring = {
consulService = "grafana";
extras.locations."/".proxyWebsockets = true;
};
ingest = consulService: {
inherit consulService;
monitoring-logs = {
internal = true;
consulService = "loki";
extras.extraConfig = ''
client_max_body_size 4G;
proxy_read_timeout 3600s;
'';
};
in config.lib.forService "monitoring" {
monitoring = {
consulService = "grafana";
extras.locations."/".proxyWebsockets = true;
};
monitoring-logs = query "loki";
monitoring-traces = query "tempo";
ingest-logs = ingest "loki";
ingest-traces-otlp = ingest "tempo-ingest-otlp-grpc" // { grpc = true; };
};
}

View file

@ -73,16 +73,6 @@ in
inherit (cluster.config.ways.monitoring-logs) url;
type = "loki";
}
{
name = "Tempo";
uid = "P214B5B846CF3925F";
inherit (cluster.config.ways.monitoring-traces) url;
type = "tempo";
jsonData = {
serviceMap.datasourceUid = "PBFA97CFB590B2093";
nodeGraph.enabled = true;
};
}
];
};
};

View file

@ -19,23 +19,10 @@
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 1210,
"id": 16,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 7,
"panels": [],
"title": "Replication",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
@ -71,7 +58,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 1
"y": 0
},
"id": 2,
"options": {
@ -79,7 +66,6 @@
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
@ -87,11 +73,9 @@
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
"textMode": "auto"
},
"pluginVersion": "11.1.3",
"pluginVersion": "9.5.1",
"targets": [
{
"datasource": {
@ -154,7 +138,7 @@
"h": 8,
"w": 4,
"x": 12,
"y": 1
"y": 0
},
"id": 3,
"options": {
@ -162,17 +146,14 @@
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [],
"fields": "/^instance$/",
"values": true
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
"textMode": "auto"
},
"pluginVersion": "11.1.3",
"pluginVersion": "9.5.1",
"targets": [
{
"datasource": {
@ -217,7 +198,7 @@
"h": 8,
"w": 8,
"x": 16,
"y": 1
"y": 0
},
"id": 5,
"options": {
@ -273,7 +254,6 @@
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@ -287,7 +267,6 @@
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineStyle": {
"fill": "solid"
@ -325,7 +304,7 @@
"h": 13,
"w": 12,
"x": 0,
"y": 9
"y": 8
},
"id": 1,
"options": {
@ -369,7 +348,6 @@
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@ -383,7 +361,6 @@
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 1,
"pointSize": 5,
@ -421,7 +398,7 @@
"h": 13,
"w": 12,
"x": 12,
"y": 9
"y": 8
},
"id": 4,
"options": {
@ -451,326 +428,24 @@
],
"title": "Activity",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 22
},
"id": 8,
"panels": [],
"title": "Storage",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}
},
"mappings": [],
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 15,
"w": 8,
"x": 0,
"y": 23
},
"id": 6,
"options": {
"displayLabels": [
"percent"
],
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "right",
"showLegend": true,
"sortBy": "Value",
"sortDesc": true,
"values": [
"percent",
"value"
]
},
"pieType": "donut",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.1.3",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "max by(datname) (sum by(datname, instance) (pg_database_size_bytes))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A",
"useBackend": false
}
],
"title": "Database Size",
"type": "piechart"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 25,
"gradientMode": "opacity",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 15,
"w": 5,
"x": 8,
"y": 23
},
"id": 9,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.1.3",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "pg_wal_size_bytes",
"format": "time_series",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{instance}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "WAL Size",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds",
"seriesBy": "last"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 25,
"gradientMode": "opacity",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 3,
"fieldMinMax": false,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 15,
"w": 11,
"x": 13,
"y": 23
},
"id": 10,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "max(sum by(instance) (pg_database_size_bytes))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Total DB Size",
"type": "timeseries"
}
],
"refresh": "5m",
"schemaVersion": 39,
"schemaVersion": 38,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-24h",
"from": "now-1h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "PostgreSQL HA",
"uid": "dc7545ef-3180-4a5e-a289-1e64571ebb87",
"version": 2,
"version": 7,
"weekStart": ""
},
"folderId": 0,

View file

@ -0,0 +1,16 @@
age-encryption.org/v1
-> ssh-ed25519 NO562A KhCGp7PAMGrEdzRxBrsW4tRk30JwpI+4lPzrRCUhSw4
8s7WqA5c3zS1euN5R+jfFNBdvr8OQW8P4NFeqtNsIKo
-> ssh-ed25519 5/zT0w 79hJQ2H76EZTW7YcQFCtKaS5Kbssx4Z8dPFjIVzRgFk
A1fDJbUnyIRy+kWa3PhJNj/SdRPlcEy6FYsAfnuZ2AQ
-> ssh-ed25519 d3WGuA aylkdL1KliM1NfrYDGlG8X6YjXvVUCU4sV90I+a840U
6sXdqIPjtoNSylZRh1DCghHOwDo+fC7WB4QWQoWmG48
-> //gd+2-grease baUWA$3 z-qs3W O/2.1W
Sfq3+rkMJhpUTTmcos5TaaUtX2Ip9pciHAZLiWPix+C9N7ccac/1W5RNedMJCLsq
MQ+xKzexf8+hgNVhKOksvbKBBROXqk1bUOKk8w3OgFPmmByzmCBUwkdkeu5DFTYR
rg
--- kUl1uIPRkM5y7C68kdN22pMKXP7gazyha4PE+ap0Jqw
w>×Àè¥
<15>CÈ,\‰ßœI¯ˆúHxG@^Çá“På ÃþÙÏlw6µŽ{þ’rbé5æ†T>Êñ
ÚWܤX4Kp(ß?9ˆß­^^oP3f </v3N$ê¤sÓbŽ¾> O™÷œ+òN0άïµàDtêŽ5Vº#è ¶³ îŸ#y|@ŒGzSi»­ô*·HùüŽ]
ꎀ5

View file

@ -1,16 +1,14 @@
{ cluster, config, pkgs, ... }:
let
inherit (cluster.config.links) prometheus-ingest;
inherit (config.links) tempo-grpc;
links = cluster.config.hostLinks.${config.networking.hostName};
inherit (cluster.config) links;
dataDir = "/srv/storage/private/tempo";
tempoConfig = {
server = {
http_listen_address = links.tempo.ipv4;
http_listen_port = links.tempo.port;
grpc_listen_address = tempo-grpc.ipv4;
grpc_listen_port = tempo-grpc.port;
grpc_listen_address = links.tempo-grpc.ipv4;
grpc_listen_port = links.tempo-grpc.port;
};
distributor.receivers = {
otlp = {
@ -21,7 +19,7 @@ let
};
zipkin.endpoint = links.tempo-zipkin-http.tuple;
};
querier.frontend_worker.frontend_address = tempo-grpc.tuple;
querier.frontend_worker.frontend_address = links.tempo-grpc.tuple;
ingester = {
trace_idle_period = "30s";
max_block_bytes = 1000000;
@ -58,7 +56,7 @@ let
path = "${dataDir}/generator/wal";
remote_write = [
{
url = "${prometheus-ingest.url}/api/v1/write";
url = "${links.prometheus-ingest.url}/api/v1/write";
send_exemplars = true;
}
];
@ -70,11 +68,7 @@ let
];
};
in {
links.tempo-grpc.protocol = "http";
services.locksmith.waitForSecrets.tempo = [
"garage-tempo-ingest"
];
age.secrets.tempoSecrets.file = ./secrets/tempo-secrets.age;
users.users.tempo = {
isSystemUser = true;
@ -87,53 +81,24 @@ in {
systemd.services.tempo = {
wantedBy = [ "multi-user.target" ];
distributed = {
enable = true;
registerServices = [
"tempo"
"tempo-ingest-otlp-grpc"
];
};
serviceConfig = {
User = "tempo";
Group = "tempo";
ExecStart = "${pkgs.tempo}/bin/tempo -config.file=${pkgs.writeText "tempo.yaml" (builtins.toJSON tempoConfig)}";
PrivateTmp = true;
EnvironmentFile = "/run/locksmith/garage-tempo-ingest";
EnvironmentFile = config.age.secrets.tempoSecrets.path;
};
};
consul.services = {
tempo = {
mode = "manual";
definition = {
name = "tempo";
address = links.tempo.ipv4;
inherit (links.tempo) port;
checks = [
{
name = "Tempo";
id = "service:tempo:backend";
interval = "5s";
http = "${links.tempo.url}/ready";
}
];
services.grafana.provision.datasources.settings.datasources = [
{
name = "Tempo";
uid = "P214B5B846CF3925F";
inherit (links.tempo) url;
type = "tempo";
jsonData = {
serviceMap.datasourceUid = "PBFA97CFB590B2093"; # prometheus
nodeGraph.enabled = true;
};
};
tempo-ingest-otlp-grpc = {
mode = "manual";
definition = {
name = "tempo-ingest-otlp-grpc";
address = links.tempo-otlp-grpc.ipv4;
inherit (links.tempo-otlp-grpc) port;
checks = [
{
name = "Tempo Service Status";
id = "service:tempo-ingest-otlp-grpc:tempo";
alias_service = "tempo";
}
];
};
};
};
}
];
}

View file

@ -49,9 +49,7 @@ in
user = {
destroyAfterDays = 0;
create = user: psqlSecret "${genPassword} ${user}" ''
SELECT 'CREATE USER ${user}'
WHERE NOT EXISTS (SELECT FROM pg_roles WHERE rolname = '${user}')\gexec
ALTER USER ${user} PASSWORD '@SECRET@';
CREATE USER ${user} PASSWORD '@SECRET@';
'';
destroy = psqlSecret "printenv OBJECT" ''
DROP USER @SECRET@;
@ -61,11 +59,7 @@ in
destroyAfterDays = 30;
deps = [ "user" ];
create = db: psql ''
SELECT 'CREATE DATABASE ${db} OWNER ${cfg.databases.${db}.owner}'
WHERE NOT EXISTS (SELECT FROM pg_database WHERE datname = '${db}')\gexec
'';
change = db: psql ''
ALTER DATABASE ${db} OWNER TO ${cfg.databases.${db}.owner};
CREATE DATABASE ${db} OWNER ${cfg.databases.${db}.owner};
'';
destroy = psqlSecret "printenv OBJECT" ''
DROP DATABASE @SECRET@;

View file

@ -4,7 +4,6 @@
imports = [
./options.nix
./incandescence.nix
./simulacrum/test-data.nix
];
links = {
@ -37,10 +36,6 @@
PATRONI_REWIND_PASSWORD = default;
metricsCredentials.nodes = nodes.worker;
};
simulacrum = {
enable = true;
deps = [ "consul" "incandescence" "locksmith" ];
settings = ./simulacrum/test.nix;
};
simulacrum.deps = [ "consul" "incandescence" "locksmith" ];
};
}

View file

@ -16,7 +16,6 @@ in
};
};
}));
default = {};
};
users = mkOption {
type = attrsOf (submodule ({ ... }: {
@ -33,7 +32,6 @@ in
};
};
}));
default = {};
};
};
}

View file

@ -1,19 +0,0 @@
{ config, lib, ... }:
{
patroni = lib.mkIf config.simulacrum {
databases = config.lib.forService "patroni" {
testdb.owner = "testuser";
existingdb.owner = "existinguser";
};
users = config.lib.forService "patroni" {
testuser.locksmith = {
nodes = config.services.patroni.nodes.haproxy;
format = "pgpass";
};
existinguser.locksmith = {
nodes = config.services.patroni.nodes.haproxy;
format = "pgpass";
};
};
};
}

View file

@ -1,117 +0,0 @@
{ cluster, ... }:
let
clusterName = "poseidon";
link = cluster.config.links.patroni-pg-access;
in
{
defaults = { depot, pkgs, ... }: {
environment.systemPackages = [
pkgs.jq
depot.packages.postgresql
];
services.patroni.settings.postgresql.pg_hba = [
"host postgres postgres 0.0.0.0/0 trust"
];
};
# taken from https://github.com/phfroidmont/nixpkgs/blob/patroni-module/nixos/tests/patroni.nix
testScript = ''
import json
nodeNames = json.loads('${builtins.toJSON cluster.config.services.patroni.nodes.worker}')
clientNames = json.loads('${builtins.toJSON cluster.config.services.patroni.nodes.haproxy}')
nodes = [ n for n in machines if n.name in nodeNames ]
clients = [ n for n in machines if n.name in clientNames ]
def booted(nodes):
return filter(lambda node: node.booted, nodes)
def wait_for_all_nodes_ready(expected_replicas=2):
booted_nodes = booted(nodes)
for node in booted_nodes:
node.wait_for_unit("patroni.service")
print(node.succeed("patronictl list ${clusterName}"))
node.wait_until_succeeds(f"[ $(patronictl list -f json ${clusterName} | jq 'length') == {expected_replicas + 1} ]")
node.wait_until_succeeds("[ $(patronictl list -f json ${clusterName} | jq 'map(select(.Role | test(\"^Leader$\"))) | map(select(.State | test(\"^running$\"))) | length') == 1 ]")
node.wait_until_succeeds(f"[ $(patronictl list -f json ${clusterName} | jq 'map(select(.Role | test(\"^Replica$\"))) | map(select(.State | test(\"^streaming$\"))) | length') == {expected_replicas} ]")
print(node.succeed("patronictl list ${clusterName}"))
for client in booted(clients):
client.wait_until_succeeds("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --command='select 1;'")
def run_dummy_queries():
for client in booted(clients):
client.succeed("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='insert into dummy(val) values (101);'")
client.succeed("test $(psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='select val from dummy where val = 101;') -eq 101")
client.succeed("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='delete from dummy where val = 101;'")
start_all()
with subtest("should bootstrap a new patroni cluster"):
wait_for_all_nodes_ready()
with subtest("should be able to insert and select"):
booted_clients = list(booted(clients))
booted_clients[0].succeed("psql -h ${link.ipv4} -p ${link.portStr} -U postgres --command='create table dummy as select * from generate_series(1, 100) as val;'")
for client in booted_clients:
client.succeed("test $(psql -h ${link.ipv4} -p ${link.portStr} -U postgres --pset='pager=off' --tuples-only --command='select count(distinct val) from dummy;') -eq 100")
with subtest("should restart after all nodes are crashed"):
for node in nodes:
node.crash()
for node in nodes:
node.start()
wait_for_all_nodes_ready()
with subtest("should be able to run queries while any one node is crashed"):
masterNodeName = nodes[0].succeed("patronictl list -f json ${clusterName} | jq '.[] | select(.Role | test(\"^Leader$\")) | .Member' -r").strip()
masterNodeIndex = next((i for i, v in enumerate(nodes) if v.name == masterNodeName))
# Move master node at the end of the list to avoid multiple failovers (makes the test faster and more consistent)
nodes.append(nodes.pop(masterNodeIndex))
for node in nodes:
node.crash()
wait_for_all_nodes_ready(1)
# Execute some queries while a node is down.
run_dummy_queries()
# Restart crashed node.
node.start()
wait_for_all_nodes_ready()
# Execute some queries with the node back up.
run_dummy_queries()
with subtest("should create databases and users via incandescence"):
for client in clients:
client.succeed(f"PGPASSFILE=/run/locksmith/patroni-testuser psql -h ${link.ipv4} -p ${link.portStr} -U testuser -d testdb --command='create table test_table_{client.name} as select * from generate_series(1, 10) as val;'")
client.fail("PGPASSFILE=/run/locksmith/patroni-testuser psql -h ${link.ipv4} -p ${link.portStr} -U testuser -d postgres --command='select * from dummy;'")
with subtest("should take over existing databases and users via incandescence"):
for cmd in [
"drop database existingdb;",
"drop user existinguser;",
"create database existingdb owner postgres;",
"create user existinguser;"
]:
clients[0].succeed(f"psql -h ${link.ipv4} -p ${link.portStr} -U postgres --command='{cmd}'")
for client in clients:
client.fail(f"PGPASSFILE=/run/locksmith/patroni-existinguser psql -h ${link.ipv4} -p ${link.portStr} -U existinguser -d existingdb --command='create table test_table_{client.name} as select * from generate_series(1, 10) as val;'")
consulConfig = json.loads(clients[0].succeed("cat /etc/consul.json"))
addr = consulConfig["addresses"]["http"]
port = consulConfig["ports"]["http"]
setEnv = f"CONSUL_HTTP_ADDR={addr}:{port}"
clients[0].succeed(f"{setEnv} consul kv delete --recurse services/incandescence/providers/patroni/formulae/database/existingdb")
clients[0].succeed(f"{setEnv} consul kv delete --recurse services/incandescence/providers/patroni/formulae/user/existinguser")
for node in nodes:
node.systemctl("restart incandescence-patroni.target")
clients[0].succeed("[[ $(psql -h ${link.ipv4} -p ${link.portStr} -U postgres --tuples-only --csv --command=\"SELECT pg_roles.rolname FROM pg_database JOIN pg_roles ON pg_database.datdba = pg_roles.oid WHERE pg_database.datname = 'existingdb'\") == existinguser ]]")
for client in clients:
client.succeed(f"PGPASSFILE=/run/locksmith/patroni-existinguser psql -h ${link.ipv4} -p ${link.portStr} -U existinguser -d existingdb --command='create table test_table_{client.name} as select * from generate_series(1, 10) as val;'")
client.fail("PGPASSFILE=/run/locksmith/patroni-existinguser psql -h ${link.ipv4} -p ${link.portStr} -U existinguser -d postgres --command='select * from dummy;'")
'';
}

View file

@ -25,10 +25,6 @@ in
"d '${baseDir}' 0700 patroni patroni - -"
"d '${walDir}' 0700 patroni patroni - -"
];
systemd.services.patroni = {
requires = [ "consul-ready.service" ];
after = [ "consul-ready.service" ];
};
services.patroni = {
enable = true;
name = hostName;
@ -61,7 +57,6 @@ in
};
use_pg_rewind = true;
use_slots = true;
synchronous_mode = true;
authentication = {
replication.username = "patronirep";
rewind.username = "patronirew";
@ -72,8 +67,6 @@ in
wal_level = "replica";
hot_standby_feedback = "on";
unix_socket_directories = "/tmp";
synchronous_commit = "on";
wal_keep_size = 2048;
};
pg_hba = [
"host replication patronirep ${net} scram-sha-256"

View file

@ -1,4 +1,4 @@
{ config, depot, ... }:
{ depot, ... }:
{
services.sso = {
@ -18,12 +18,4 @@
login.target = ssoAddr;
account.target = ssoAddr;
};
patroni = config.lib.forService "sso" {
databases.keycloak = {};
users.keycloak.locksmith = {
nodes = config.services.sso.nodes.host;
format = "raw";
};
};
}

View file

@ -8,10 +8,12 @@ in
{
links.keycloak.protocol = "http";
services.locksmith.waitForSecrets.keycloak = [
"patroni-keycloak"
];
age.secrets.keycloak-dbpass = {
file = ../../../secrets/keycloak-dbpass.age;
owner = "root";
group = "root";
mode = "0400";
};
services.nginx.virtualHosts = {
"${login}" = lib.recursiveUpdate (vhosts.proxy kc.url) {
locations = {
@ -34,7 +36,7 @@ in
host = patroni.ipv4;
inherit (patroni) port;
useSSL = false;
passwordFile = "/run/locksmith/patroni-keycloak";
passwordFile = config.age.secrets.keycloak-dbpass.path;
};
settings = {
http-host = kc.ipv4;
@ -52,7 +54,7 @@ in
"-Dotel.traces.exporter=otlp"
];
OTEL_EXPORTER_OTLP_PROTOCOL = "grpc";
OTEL_EXPORTER_OTLP_ENDPOINT = cluster.config.ways.ingest-traces-otlp.url;
OTEL_EXPORTER_OTLP_ENDPOINT = cluster.config.links.tempo-otlp-grpc.url;
OTEL_TRACES_SAMPLER = "parentbased_traceidratio";
OTEL_TRACES_SAMPLER_ARG = "0.50";
};

View file

@ -39,6 +39,7 @@ in
./garage.nix
./garage-options.nix
./garage-layout.nix
./incandescence-ascensions.nix
] ++ lib.optionals config.simulacrum [
./simulacrum/snakeoil-rpc-secret.nix
];
@ -56,8 +57,8 @@ in
};
simulacrum = {
enable = true;
deps = [ "wireguard" "consul" "locksmith" "dns" "incandescence" ];
settings = ./simulacrum/test.nix;
deps = [ "consul" "locksmith" "incandescence" "patroni" "ways" ];
settings = ./test.nix;
};
};

View file

@ -36,9 +36,10 @@ in
inherit (linkWeb) port;
checks = [
{
name = "Garage Service Status";
id = "service:garage-web:garage";
alias_service = "garage";
name = "Garage Node";
id = "service:garage-web:node";
interval = "5s";
http = "${config.links.garageMetrics.url}/health";
}
];
};

View file

@ -179,13 +179,9 @@ in
key = {
destroyAfterDays = 0;
create = key: ''
if [[ "$(garage key info ${lib.escapeShellArg key} 2>&1 >/dev/null)" == "Error: 0 matching keys" ]]; then
# don't print secret key
garage key new --name ${lib.escapeShellArg key} >/dev/null
echo Key ${lib.escapeShellArg key} was created.
else
echo "Key already exists, assuming ownership"
fi
# don't print secret key
garage key new --name ${lib.escapeShellArg key} >/dev/null
echo Key ${lib.escapeShellArg key} was created.
'';
destroy = ''
garage key delete --yes "$OBJECT"
@ -200,11 +196,7 @@ in
deps = [ "key" ];
destroyAfterDays = 30;
create = bucket: ''
if [[ "$(garage bucket info ${lib.escapeShellArg bucket} 2>&1 >/dev/null)" == "Error: Bucket not found" ]]; then
garage bucket create ${lib.escapeShellArg bucket}
else
echo "Bucket already exists, assuming ownership"
fi
garage bucket create ${lib.escapeShellArg bucket}
'';
destroy = ''
garage bucket delete --yes "$OBJECT"

View file

@ -0,0 +1,18 @@
{ config, lib, ... }:
{
system.ascensions = {
incandescence-garage = lib.mkIf (config.services.incandescence.providers ? garage) {
incantations = i: [
(i.runGarage /*bash*/ ''
garage bucket list | tail -n +2 | cut -d' ' -f3 | while read bucket; do
${i.runConsul /*bash*/ ''consul kv put "services/incandescence/providers/garage/formulae/bucket/$1/alive" true''} "$bucket"
done
garage key list | tail -n +2 | cut -d' ' -f5 | while read key; do
${i.runConsul /*bash*/ ''consul kv put "services/incandescence/providers/garage/formulae/key/$1/alive" true''} "$key"
done
'')
];
};
};
}

View file

@ -3,17 +3,12 @@
{
imports = [
./options
./simulacrum/test-data.nix
];
services.ways = {
nodes.host = config.services.websites.nodes.host;
nixos.host = ./host.nix;
simulacrum = {
enable = true;
deps = [ "nginx" "acme-client" "dns" "certificates" "consul" ];
settings = ./simulacrum/test.nix;
};
simulacrum.deps = [ "nginx" "acme-client" "dns" "certificates" "consul" ];
};
dns.records = lib.mapAttrs'

View file

@ -25,14 +25,7 @@ in
];
locations = lib.mkMerge [
{
"/" = if cfg.grpc then {
extraConfig = ''
set $nix_proxy_grpc_target ${cfg.target};
grpc_pass $nix_proxy_grpc_target;
'';
} else {
proxyPass = cfg.target;
};
"/".proxyPass = cfg.target;
"${cfg.healthCheckPath}".extraConfig = "access_log off;";
}
{

View file

@ -35,12 +35,6 @@ with lib;
};
};
grpc = mkOption {
description = "Whether this endpoint is a gRPC service.";
type = types.bool;
default = false;
};
target = mkOption {
type = types.str;
};
@ -107,7 +101,7 @@ with lib;
(lib.mkIf options.consulService.isDefined {
useConsul = true;
nginxUpstreamName = "ways_upstream_${builtins.hashString "md5" options.consulService.value}";
target = "${if config.grpc then "grpc" else "http"}://${options.nginxUpstreamName.value}";
target = "http://${options.nginxUpstreamName.value}";
})
(lib.mkIf options.bucket.isDefined {
consulService = "garage-web";

View file

@ -1,11 +0,0 @@
{ config, lib, ... }:
{
ways = lib.mkIf config.simulacrum {
ways-test-simple = config.lib.forService "ways" {
target = "http://nowhere";
};
ways-test-consul = config.lib.forService "ways" {
consulService = "ways-test-service";
};
};
}

View file

@ -1,55 +0,0 @@
{ cluster, config, lib, ... }:
let
inherit (cluster._module.specialArgs.depot.lib.meta) domain;
in
{
nodes = lib.mkMerge [
{
nowhere = { pkgs, ... }: {
networking.firewall.allowedTCPPorts = [ 8080 ];
systemd.services.ways-simple-service = let
webroot = pkgs.writeTextDir "example.txt" "hello world";
in {
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = "${pkgs.darkhttpd}/bin/darkhttpd ${webroot} --port 8080";
DynamicUser = true;
};
};
};
}
(lib.genAttrs cluster.config.services.ways.nodes.host (lib.const {
services.nginx.upstreams.nowhere.servers = {
"${(builtins.head config.nodes.nowhere.networking.interfaces.eth1.ipv4.addresses).address}:8080" = {};
};
consul.services.ways-test-service = {
unit = "consul";
mode = "external";
definition = {
name = "ways-test-service";
address = (builtins.head config.nodes.nowhere.networking.interfaces.eth1.ipv4.addresses).address;
port = 8080;
};
};
}))
];
testScript = ''
import json
nodeNames = json.loads('${builtins.toJSON cluster.config.services.ways.nodes.host}')
nodes = [ n for n in machines if n.name in nodeNames ]
start_all()
nowhere.wait_for_unit("multi-user.target")
for node in nodes:
node.wait_for_unit("multi-user.target")
with subtest("single-target service"):
nowhere.succeed("curl -f https://ways-test-simple.${domain}")
with subtest("consul-managed service"):
nowhere.succeed("curl -f https://ways-test-consul.${domain}")
'';
}

View file

@ -93,9 +93,5 @@ in
nodes = config.services.wireguard.nodes.mesh;
shared = false;
};
simulacrum = {
enable = true;
settings = ./test.nix;
};
};
}

View file

@ -1,28 +0,0 @@
{ cluster, lib, ... }:
{
defaults.options.services.locksmith = lib.mkSinkUndeclaredOptions { };
testScript = ''
start_all()
${lib.pipe cluster.config.services.wireguard.nodes.mesh [
(map (node: /*python*/ ''
${node}.wait_for_unit("wireguard-wgmesh.target")
''))
(lib.concatStringsSep "\n")
]}
${lib.pipe cluster.config.services.wireguard.nodes.mesh [
(map (node: /*python*/ ''
with subtest("${node} can reach all other nodes"):
${lib.pipe (cluster.config.services.wireguard.otherNodes.mesh node) [
(map (peer: /*python*/ ''
${node}.succeed("ping -c3 ${cluster.config.hostLinks.${peer}.mesh.extra.meshIp}")
''))
(lib.concatStringsSep "\n ")
]}
''))
(lib.concatStringsSep "\n")
]}
'';
}

View file

@ -26,13 +26,11 @@ in
cfg = v.distributed;
svcs = map (x: config.consul.services.${x}) cfg.registerServices;
svc = config.consul.services.${cfg.registerService};
runWithRegistration = pkgs.writeShellScript "run-with-registration" ''
trap '${lib.concatStringsSep ";" (map (svc: svc.commands.deregister) svcs)}' EXIT
${lib.concatStringsSep "\n" (
map (svc: svc.commands.register) svcs
)}
trap '${svc.commands.deregister}' EXIT
${svc.commands.register}
''${@}
'';
@ -51,10 +49,10 @@ in
[Service]
ExecStartPre=${waitForConsul} 'services/${n}%i'
ExecStart=
ExecStart=${consul}/bin/consul lock --name=${n} --n=${toString cfg.replicas} --shell=false --child-exit-code 'services/${n}%i' ${optionalString (cfg.registerServices != []) runWithRegistration} ${ExecStart}
ExecStart=${consul}/bin/consul lock --name=${n} --n=${toString cfg.replicas} --shell=false --child-exit-code 'services/${n}%i' ${optionalString (cfg.registerService != null) runWithRegistration} ${ExecStart}
Environment="CONSUL_HTTP_ADDR=${consulHttpAddr}"
${optionalString (v.serviceConfig ? RestrictAddressFamilies) "RestrictAddressFamilies=AF_NETLINK"}
${optionalString (cfg.registerServices != []) (lib.concatStringsSep "\n" (map (svc: "ExecStopPost=${svc.commands.deregister}") svcs))}
${optionalString (cfg.registerService != null) "ExecStopPost=${svc.commands.deregister}"}
''))
];
}

View file

@ -1,4 +1,4 @@
{ cluster, config, depot, pkgs, ... }:
{ config, depot, pkgs, ... }:
{
users.motd = builtins.readFile ./motd.txt;
environment.interactiveShellInit = let
@ -8,11 +8,6 @@
grep = exec pkgs.gnugrep "grep";
countUsers = '' ${util "who"} -q | ${util "head"} -n1 | ${util "tr"} ' ' \\n | ${util "uniq"} | ${util "wc"} -l'';
countSessions = '' ${util "who"} -q | ${util "head"} -n1 | ${util "wc"} -w'';
rev = if cluster.config.simulacrum then
"simulacrum"
else
depot.rev or "\${BRED}()\${CO}\${BWHITE} Dirty";
in ''
(
# Reset colors
@ -45,7 +40,7 @@
echo -e " ''${BGREEN}()''${CO} ''${BWHITE}You are using a genuine Private Void system.''${CO}"
echo " "
echo -e " ''${BWHITE}OS Version....:''${CO} NixOS ''${CAB}${config.system.nixos.version}''${CO}"
echo -e " ''${BWHITE}Configuration.:''${CO} ''${CAB}${rev}''${CO}"
echo -e " ''${BWHITE}Configuration.:''${CO} ''${CAB}${depot.rev or "\${BRED}()\${CO}\${BWHITE} Dirty"}''${CO}"
echo -e " ''${BWHITE}Uptime........:''${CO} $(${uptime} -p | ${util "cut"} -d ' ' -f2- | GREP_COLORS='mt=01;35' ${grep} --color=always '[0-9]*')"
echo -e " ''${BWHITE}SSH Logins....:''${CO} There are currently ''${CAB}$(${countUsers})''${CO} users logged in on ''${CAB}$(${countSessions})''${CO} sessions"
)

View file

@ -29,6 +29,7 @@ in
reflection = ./reflection;
shell-config = ./shell-config;
ssh = ./ssh;
system-info = ./system-info;
system-recovery = ./system-recovery;
systemd-extras = ./systemd-extras;
tested = ./tested;
@ -55,6 +56,7 @@ in
motd
networking
nix-config-server
system-info
system-recovery
tested
];

View file

@ -0,0 +1,5 @@
{ depot, ... }:
{
system.configurationRevision = depot.rev or null;
}

View file

@ -17,11 +17,6 @@ with lib;
type = with types; nullOr str;
default = null;
};
registerServices = mkOption {
description = "Consul services to register when this service gets started.";
type = with types; listOf str;
default = if config.distributed.registerService == null then [ ] else [ config.distributed.registerService ];
};
};
}));
};

View file

@ -7,6 +7,7 @@ let
in
{
debug = lib.warn "debug mode is enabled" true;
perSystem = { filters, pkgs, self', system, ... }: {
checks = lib.mkIf (system == "x86_64-linux") {
ascensions = pkgs.callPackage ./ascensions.nix {
@ -29,6 +30,11 @@ in
inherit (self'.packages) keycloak;
};
patroni = pkgs.callPackage ./patroni.nix {
inherit (self) nixosModules;
inherit (self'.packages) postgresql;
};
s3ql-upgrade = pkgs.callPackage ./s3ql-upgrade.nix {
inherit (self'.packages) s3ql;
inherit (self) nixosModules;

211
packages/checks/patroni.nix Normal file
View file

@ -0,0 +1,211 @@
{ nixosTest, nixosModules, postgresql }:
# taken from https://github.com/phfroidmont/nixpkgs/blob/patroni-module/nixos/tests/patroni.nix
nixosTest (
let
nodesIps = [
"192.168.1.1"
"192.168.1.2"
"192.168.1.3"
];
createNode = index: { pkgs, ... }:
let
ip = builtins.elemAt nodesIps index; # since we already use IPs to identify servers
in
{
imports = [
nixosModules.patroni
nixosModules.systemd-extras
];
networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkOverride 0 [
{ address = ip; prefixLength = 16; }
];
networking.firewall.allowedTCPPorts = [ 5432 8008 5010 ];
environment.systemPackages = [ pkgs.jq ];
services.patroni = {
enable = true;
postgresqlPackage = postgresql.withPackages (p: [ p.pg_safeupdate ]);
scope = "cluster1";
name = "node${toString(index + 1)}";
nodeIp = ip;
otherNodesIps = builtins.filter (h: h != ip) nodesIps;
softwareWatchdog = true;
settings = {
bootstrap = {
dcs = {
ttl = 30;
loop_wait = 10;
retry_timeout = 10;
maximum_lag_on_failover = 1048576;
};
initdb = [
{ encoding = "UTF8"; }
"data-checksums"
];
};
postgresql = {
use_pg_rewind = true;
use_slots = true;
authentication = {
replication = {
username = "replicator";
};
superuser = {
username = "postgres";
};
rewind = {
username = "rewind";
};
};
parameters = {
listen_addresses = "${ip}";
wal_level = "replica";
hot_standby_feedback = "on";
unix_socket_directories = "/tmp";
};
pg_hba = [
"host replication replicator 192.168.1.0/24 md5"
# Unsafe, do not use for anything other than tests
"host all all 0.0.0.0/0 trust"
];
};
etcd3 = {
host = "192.168.1.4:2379";
};
};
environmentFiles = {
PATRONI_REPLICATION_PASSWORD = pkgs.writeText "replication-password" "postgres";
PATRONI_SUPERUSER_PASSWORD = pkgs.writeText "superuser-password" "postgres";
PATRONI_REWIND_PASSWORD = pkgs.writeText "rewind-password" "postgres";
};
};
# We always want to restart so the tests never hang
systemd.services.patroni.serviceConfig.StartLimitIntervalSec = 0;
};
in
{
name = "patroni";
nodes = {
node1 = createNode 0;
node2 = createNode 1;
node3 = createNode 2;
etcd = { pkgs, ... }: {
networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkOverride 0 [
{ address = "192.168.1.4"; prefixLength = 16; }
];
services.etcd = {
enable = true;
listenClientUrls = [ "http://192.168.1.4:2379" ];
};
networking.firewall.allowedTCPPorts = [ 2379 ];
};
client = { pkgs, ... }: {
environment.systemPackages = [ postgresql ];
networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkOverride 0 [
{ address = "192.168.2.1"; prefixLength = 16; }
];
services.haproxy = {
enable = true;
config = ''
global
maxconn 100
defaults
log global
mode tcp
retries 2
timeout client 30m
timeout connect 4s
timeout server 30m
timeout check 5s
listen cluster1
bind 127.0.0.1:5432
option httpchk
http-check expect status 200
default-server inter 3s fall 3 rise 2 on-marked-down shutdown-sessions
${builtins.concatStringsSep "\n" (map (ip: "server postgresql_${ip}_5432 ${ip}:5432 maxconn 100 check port 8008") nodesIps)}
'';
};
};
};
testScript = ''
nodes = [node1, node2, node3]
def wait_for_all_nodes_ready(expected_replicas=2):
booted_nodes = filter(lambda node: node.booted, nodes)
for node in booted_nodes:
print(node.succeed("patronictl list cluster1"))
node.wait_until_succeeds(f"[ $(patronictl list -f json cluster1 | jq 'length') == {expected_replicas + 1} ]")
node.wait_until_succeeds("[ $(patronictl list -f json cluster1 | jq 'map(select(.Role | test(\"^Leader$\"))) | map(select(.State | test(\"^running$\"))) | length') == 1 ]")
node.wait_until_succeeds(f"[ $(patronictl list -f json cluster1 | jq 'map(select(.Role | test(\"^Replica$\"))) | map(select(.State | test(\"^streaming$\"))) | length') == {expected_replicas} ]")
print(node.succeed("patronictl list cluster1"))
client.wait_until_succeeds("psql -h 127.0.0.1 -U postgres --command='select 1;'")
def run_dummy_queries():
client.succeed("psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='insert into dummy(val) values (101);'")
client.succeed("test $(psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='select val from dummy where val = 101;') -eq 101")
client.succeed("psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='delete from dummy where val = 101;'")
start_all()
with subtest("should bootstrap a new patroni cluster"):
wait_for_all_nodes_ready()
with subtest("should be able to insert and select"):
client.succeed("psql -h 127.0.0.1 -U postgres --command='create table dummy as select * from generate_series(1, 100) as val;'")
client.succeed("test $(psql -h 127.0.0.1 -U postgres --pset='pager=off' --tuples-only --command='select count(distinct val) from dummy;') -eq 100")
with subtest("should restart after all nodes are crashed"):
for node in nodes:
node.crash()
for node in nodes:
node.start()
wait_for_all_nodes_ready()
with subtest("should be able to run queries while any one node is crashed"):
masterNodeName = node1.succeed("patronictl list -f json cluster1 | jq '.[] | select(.Role | test(\"^Leader$\")) | .Member' -r").strip()
masterNodeIndex = int(masterNodeName[len(masterNodeName)-1]) - 1
# Move master node at the end of the list to avoid multiple failovers (makes the test faster and more consistent)
nodes.append(nodes.pop(masterNodeIndex))
for node in nodes:
node.crash()
wait_for_all_nodes_ready(1)
# Execute some queries while a node is down.
run_dummy_queries()
# Restart crashed node.
node.start()
wait_for_all_nodes_ready()
# Execute some queries with the node back up.
run_dummy_queries()
'';
})

View file

@ -10,9 +10,6 @@ testers.runNixOSTest {
nixosModules.systemd-extras
./modules/nixos/age-dummy-secrets
./modules/nixos/age-dummy-secrets/options.nix
{
options.services.locksmith = lib.mkSinkUndeclaredOptions { };
}
];
_module.args.depot.packages = { inherit (previous.packages.${system}) s3ql; };

View file

@ -11,6 +11,8 @@
nix-super = packages.nix-super.nix;
agenix = packages.agenix.agenix.override { nix = nix-super; };
hci = packages.hercules-ci-agent.hercules-ci-cli;
};
};
}

View file

@ -59,6 +59,7 @@
tools = with flakePkgs; [
agenix
graf
hci
npins
pin
void

View file

@ -1,5 +1,6 @@
{
packages = {
hci = [ "x86_64-linux" ];
hydra = [ "x86_64-linux" ];
jellyfin = [ "x86_64-linux" ];
keycloak = [ "x86_64-linux" ];

BIN
secrets/keycloak-dbpass.age Normal file

Binary file not shown.