WIP: Consul ACLs #117

Draft
max wants to merge 8 commits from pr-consul-acl into master
7 changed files with 111 additions and 11 deletions

View file

@ -28,6 +28,35 @@ in
bootstrap_expect = builtins.length cfg.nodes.agent;
addresses.http = config.links.consulAgent.ipv4;
ports.http = config.links.consulAgent.port;
acl = {
enabled = true;
default_policy = "deny";
};
};
};
systemd.services = {
consul.serviceConfig.Type = "notify";
consul-load-smt = {
wantedBy = [ "consul.service" ];
after = [ "consul.service" ];
environment.CONSUL_HTTP_ADDR = config.links.consulAgent.tuple;
path = [
config.services.consul.package
];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
};
script = ''
while ! test -e /run/locksmith/consul-systemManagementToken; do
echo Waiting for System Management Token
systemctl start locksmith.service
Review

Find a way to make Locksmith work better while bootstrapping. Polling like this is pretty fucked up. Maybe there should be a proper "Consul is ready without SMT" level and Locksmith and the Chant Listener should be configured to work with that level.

Find a way to make Locksmith work better while bootstrapping. Polling like this is pretty fucked up. Maybe there should be a proper "Consul is ready without SMT" level and Locksmith and the Chant Listener should be configured to work with that level.
sleep 5
done
export CONSUL_HTTP_TOKEN_FILE=/run/locksmith/consul-systemManagementToken
consul acl set-agent-token default "$(< /run/locksmith/consul-systemManagementToken)" # TODO: don't leak token on cmdline
Review

Setting the default token means it's no longer required to present a token when connecting to this agent. Should probably limit this to automatic actions only (dns, replication token types?)

Setting the default token means it's no longer required to present a token when connecting to this agent. Should probably limit this to automatic actions only (`dns`, `replication` token types?)
'';
};
};

View file

@ -0,0 +1,65 @@
{ cluster, config, lib, pkgs, ... }:
let
sentinelFile = "/var/lib/consul/nixos-acl-bootstrapped";
bootstrapTokenFile = "/run/keys/consul-bootstrap-token";
bootstrapConfig = "consul-bootstrap-config.json";
writeRules = rules: pkgs.writeText "consul-policy.json" (builtins.toJSON rules);
in
{
systemd.services = {
consul-acl-bootstrap = {
requires = [ "consul.service" ];
after = [ "consul.service" ];
wantedBy = [ "multi-user.target" ];
unitConfig.ConditionPathExists = "!${sentinelFile}";
serviceConfig = {
Type = "oneshot";
PrivateTmp = true;
};
environment.CONSUL_HTTP_ADDR = config.links.consulAgent.tuple;
path = [
config.services.consul.package
pkgs.jq
];
script = ''
umask 77
if consul acl bootstrap --format=json > ${bootstrapConfig}; then
echo Bootstrapping:
jq -r .SecretID < ${bootstrapConfig} > ${bootstrapTokenFile}
export CONSUL_HTTP_TOKEN_FILE=${bootstrapTokenFile}
consul acl policy create --name operator-read --description "Read-only operator actions" --rules @${writeRules { operator = "read"; }}
consul acl policy create --name smt-read --description "Allow reading the encrypted system management token" --rules @${writeRules { key_prefix."secrets/locksmith/consul-systemManagementToken/".policy = "read"; }}
consul acl token update --id 00000000-0000-0000-0000-000000000002 --append-policy-name operator-read --append-policy-name smt-read
else
echo Bootstrap is already in progress elsewhere.
touch ${sentinelFile}
fi
'';
};
locksmith-provider-consul = {
unitConfig.ConditionPathExists = bootstrapTokenFile;
distributed.enable = lib.mkForce false;
environment = {
CONSUL_HTTP_ADDR = config.links.consulAgent.tuple;
CONSUL_HTTP_TOKEN_FILE = bootstrapTokenFile;
};
postStop = ''
rm -f ${bootstrapTokenFile}
touch ${sentinelFile}
'';
};
};
services.locksmith.providers.consul = {
wantedBy = [ "consul-acl-bootstrap.service" ];
after = [ "consul-acl-bootstrap.service" ];
secrets.systemManagementToken = {
nodes = cluster.config.services.consul.nodes.agent;
checkUpdate = "test -e ${bootstrapTokenFile}";
command = "cat ${bootstrapTokenFile}";
};
};
}

View file

@ -14,6 +14,7 @@ in
nodes = {
agent = [ "checkmate" "grail" "thunderskin" "VEGAS" "prophet" ];
ready = config.services.consul.nodes.agent;
bootstrap = [ "grail" "VEGAS" ];
};
nixos = {
agent = [
@ -21,10 +22,11 @@ in
./remote-api.nix
];
ready = ./ready.nix;
bootstrap = ./bootstrap.nix;
};
simulacrum = {
enable = true;
deps = [ "wireguard" ];
deps = [ "wireguard" "locksmith" ];
settings = ./test.nix;
};
};

View file

@ -51,4 +51,9 @@ in
Type = "oneshot";
};
};
systemd.targets.consul-ready = {
Review

This entire thing is ugly

This entire thing is ugly
description = "Consul is Ready";
requires = [ "consul-ready.service" ] ++ lib.optional config.services.consul.enable "consul-load-smt.service";
};
}

View file

@ -1,8 +1,4 @@
{ lib, ... }:
{
defaults.options.services.locksmith = lib.mkSinkUndeclaredOptions { };
testScript = ''
import json
@ -11,12 +7,12 @@
with subtest("should form cluster"):
nodes = [ n for n in machines if n != nowhere ]
for machine in nodes:
machine.succeed("systemctl start consul-ready.service")
machine.succeed("systemctl start consul-ready.target")
Review

ugly

ugly
for machine in nodes:
consulConfig = json.loads(machine.succeed("cat /etc/consul.json"))
addr = consulConfig["addresses"]["http"]
port = consulConfig["ports"]["http"]
setEnv = f"CONSUL_HTTP_ADDR={addr}:{port}"
setEnv = f"CONSUL_HTTP_ADDR={addr}:{port} CONSUL_HTTP_TOKEN_FILE=/run/locksmith/consul-systemManagementToken"
memberList = machine.succeed(f"{setEnv} consul members --status=alive")
for machine2 in nodes:
assert machine2.name in memberList

View file

@ -43,14 +43,15 @@ in
hasSpecialPrefix = elem (substring 0 1 ExecStart) [ "@" "-" ":" "+" "!" ];
in assert !hasSpecialPrefix; pkgs.writeTextDir "etc/systemd/system/${n}.service.d/distributed.conf" ''
[Unit]
Requires=consul-ready.service
After=consul-ready.service
Requires=consul-ready.target
After=consul-ready.target
Review

ugly

ugly
[Service]
ExecStartPre=${waitForConsul} 'services/${n}%i'
ExecStart=
ExecStart=${consul}/bin/consul lock --name=${n} --n=${toString cfg.replicas} --shell=false --child-exit-code 'services/${n}%i' ${optionalString (cfg.registerService != null) runWithRegistration} ${ExecStart}
Environment="CONSUL_HTTP_ADDR=${consulHttpAddr}"
Environment="CONSUL_HTTP_TOKEN_FILE=/run/locksmith/consul-systemManagementToken"
${optionalString (v.serviceConfig ? RestrictAddressFamilies) "RestrictAddressFamilies=AF_NETLINK"}
${optionalString (cfg.registerService != null) "ExecStopPost=${svc.commands.deregister}"}
''))

View file

@ -12,6 +12,7 @@ let
consulRegisterScript = pkgs.writeShellScript "consul-register" ''
export CONSUL_HTTP_ADDR='${consulHttpAddr}'
export CONSUL_HTTP_TOKEN_FILE=/run/locksmith/consul-systemManagementToken
Review

This should definitely be statically optional, i.e. can disable ACL support in this module.

This should probably be dynamically optional, i.e. check for the existence of the token file first. This could be useful for registering and/or running distributed Consul services before the ACL bootstrap is complete.

This should definitely be statically optional, i.e. can disable ACL support in this module. This should probably be *dynamically* optional, i.e. check for the existence of the token file first. This could be useful for registering and/or running distributed Consul services before the ACL bootstrap is complete.
while ! ${consul} services register "$1"; do
sleep 1
done
@ -19,6 +20,7 @@ let
consulDeregisterScript = pkgs.writeShellScript "consul-deregister" ''
export CONSUL_HTTP_ADDR='${consulHttpAddr}'
export CONSUL_HTTP_TOKEN_FILE=/run/locksmith/consul-systemManagementToken
for i in {1..5}; do
if ${consul} services deregister "$1"; then
break
@ -81,8 +83,8 @@ let
}.${mode};
value = {
direct = {
after = [ "consul-ready.service" ];
requires = [ "consul-ready.service" ];
after = [ "consul-ready.target" ];
requires = [ "consul-ready.target" ];
Review

ugly

ugly
serviceConfig = {
ExecStartPost = register servicesJson;
ExecStopPost = deregister servicesJson;