cluster/services/hercules-ci-multi-agent: use kranzes' refactored modules

This commit is contained in:
Max Headroom 2023-10-26 01:35:31 +02:00
parent ea29ed2375
commit 2335305284
7 changed files with 377 additions and 1 deletions

View file

@ -17,7 +17,7 @@ let
in
{
imports = [
depot.inputs.hercules-ci-agent.nixosModules.multi-agent-service
./modules/multi-agent-refactored
];
age.secrets = mergeMap (name: _: {

View file

@ -0,0 +1,148 @@
{ config, lib, pkgs, ... }:
{
options = {
services.hercules-ci-agents = lib.mkOption {
default = { };
type = lib.types.attrsOf (lib.types.submodule (import ./options.nix { inherit config lib pkgs; }));
description = lib.mdDoc "Hercules CI Agent instances.";
example = {
agent1.enable = true;
agent2 = {
enable = true;
settings.labels.myMetadata = "agent2";
};
};
};
};
config =
let
forAllAgents = f: lib.mkMerge (lib.mapAttrsToList (name: agent: lib.mkIf agent.enable (f name agent)) config.services.hercules-ci-agents);
in
{
users = forAllAgents (name: agent: {
users.${agent.user} = {
inherit (agent) group;
description = "Hercules CI Agent system user for ${name}";
isSystemUser = true;
};
groups.${agent.group} = { };
});
systemd = forAllAgents (name: agent:
let
command = "${lib.getExe agent.package} --config ${agent.tomlFile}";
testCommand = "${command} --test-configuration";
in
{
tmpfiles.rules = [ "d ${agent.settings.workDirectory} 0700 ${agent.user} ${agent.group} - -" ];
services."hercules-ci-agent-${name}" = {
wantedBy = [ "multi-user.target" ];
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
startLimitBurst = 30 * 1000000; # practically infinite
serviceConfig = {
User = agent.user;
Group = agent.group;
ExecStart = command;
ExecStartPre = testCommand;
Restart = "on-failure";
RestartSec = 120;
# If a worker goes OOM, don't kill the main process. It needs to
# report the failure and it's unlikely to be part of the problem.
OOMPolicy = "continue";
# Work around excessive stack use by libstdc++ regex
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86164
# A 256 MiB stack allows between 400 KiB and 1.5 MiB file to be matched by ".*".
LimitSTACK = 256 * 1024 * 1024;
# Hardening.
CapabilityBoundingSet = "";
DeviceAllow = "";
LockPersonality = true;
NoNewPrivileges = true;
PrivateDevices = true;
PrivateMounts = true;
PrivateTmp = true;
PrivateUsers = true;
ProtectClock = true;
ProtectControlGroups = true;
ProtectHome = true;
ProtectKernelLogs = true;
ProtectKernelModules = true;
ProtectKernelTunables = true;
ProtectSystem = "full";
RemoveIPC = true;
RestrictRealtime = true;
RestrictSUIDSGID = true;
RestrictAddressFamilies = [ "AF_UNIX" "AF_INET" "AF_INET6" ];
SystemCallArchitectures = "native";
UMask = "077";
WorkingDirectory = agent.settings.workDirectory;
};
};
# Changes in the secrets do not affect the unit in any way that would cause
# a restart, which is currently necessary to reload the secrets.
paths."hercules-ci-agent-${name}-restart-files" = {
wantedBy = [ "hercules-ci-agent-${name}.service" ];
pathConfig = {
Unit = "hercules-ci-agent-${name}-restarter.service";
PathChanged = [ agent.settings.clusterJoinTokenPath agent.settings.binaryCachesPath ];
};
};
services."hercules-ci-agent-restarter-${name}" = {
serviceConfig.Type = "oneshot";
script = ''
# Wait a bit, with the effect of bundling up file changes into a single
# run of this script and hopefully a single restart.
sleep 10
if systemctl is-active --quiet 'hercules-ci-agent-${name}.service'; then
if ${testCommand}; then
systemctl restart 'hercules-ci-agent-${name}.service'
else
echo 1>&2 'WARNING: Not restarting hercules-ci-agent-${name} because config is not valid at this time.'
fi
else
echo 1>&2 'Not restarting hercules-ci-agent-${name} despite config file update, because it is not already active.'
fi
'';
};
});
nix.settings = forAllAgents (_: agent: {
trusted-users = [ agent.user ];
# A store path that was missing at first may well have finished building,
# even shortly after the previous lookup. This *also* applies to the daemon.
narinfo-cache-negative-ttl = 0;
});
# Trusted user allows simplified configuration and better performance
# when operating in a cluster.
assertions = forAllAgents (_: agent: [
{
assertion = (agent.settings.nixUserIsTrusted or false) -> builtins.match ".*(^|\n)[ \t]*trusted-users[ \t]*=.*" config.nix.extraOptions == null;
message = ''
hercules-ci-agent: Please do not set `trusted-users` in `nix.extraOptions`.
The hercules-ci-agent module by default relies on `nix.settings.trusted-users`
to be effectful, but a line like `trusted-users = ...` in `nix.extraOptions`
will override the value set in `nix.settings.trusted-users`.
Instead of setting `trusted-users` in the `nix.extraOptions` string, you should
set an option with additive semantics, such as
- the NixOS option `nix.settings.trusted-users`, or
- the Nix option in the `extraOptions` string, `extra-trusted-users`
'';
}
]);
};
meta.maintainers = with lib.maintainers; [ roberth kranzes ];
}

View file

@ -0,0 +1,62 @@
{ config, lib, pkgs, ... }:
let
systemConfig = config;
in
{ config, name, ... }:
let
inherit (lib) types;
in
{
options = {
enable = lib.mkEnableOption (lib.mdDoc ''
Hercules CI Agent as a system service.
[Hercules CI](https://hercules-ci.com) is a
continuous integation service that is centered around Nix.
Support is available at [help@hercules-ci.com](mailto:help@hercules-ci.com).
'');
package = lib.mkPackageOptionMD pkgs "hercules-ci-agent" { };
user = lib.mkOption {
type = types.str;
default = "hci-${name}";
description = lib.mdDoc "User account under which hercules-ci-agent runs.";
internal = true;
};
group = lib.mkOption {
type = types.str;
default = "hci-${name}";
description = lib.mdDoc "Group account under which hercules-ci-agent runs.";
internal = true;
};
settings = lib.mkOption {
type = types.submodule (import ./settings.nix { inherit systemConfig lib name pkgs; agent = config; });
default = { };
description = lib.mdDoc ''
These settings are written to the `agent.toml` file.
Not all settings are listed as options, can be set nonetheless.
For the exhaustive list of settings, see <https://docs.hercules-ci.com/hercules-ci/reference/agent-config/>.
'';
};
tomlFile = lib.mkOption {
type = types.path;
internal = true;
defaultText = lib.literalMD "generated `hercules-ci-agent-${name}.toml`";
description = lib.mdDoc ''
The fully assembled config file.
'';
};
};
config = {
tomlFile = (pkgs.formats.toml { }).generate "hercules-ci-agent-${name}.toml" config.settings;
};
}

View file

@ -0,0 +1,163 @@
{ agent, systemConfig, lib, name, pkgs, ... }:
{ config, ... }:
let
inherit (lib) types;
format = pkgs.formats.toml { };
in
{
freeformType = format.type;
options = {
apiBaseUrl = lib.mkOption {
description = lib.mdDoc ''
API base URL that the agent will connect to.
When using Hercules CI Enterprise, set this to the URL where your
Hercules CI server is reachable.
'';
type = types.str;
default = "https://hercules-ci.com";
};
baseDirectory = lib.mkOption {
type = types.path;
default = "/var/lib/hercules-ci-agent-${name}";
description = lib.mdDoc ''
State directory (secrets, work directory, etc) for agent
'';
};
concurrentTasks = lib.mkOption {
description = lib.mdDoc ''
Number of tasks to perform simultaneously.
A task is a single derivation build, an evaluation or an effect run.
At minimum, you need 2 concurrent tasks for `x86_64-linux`
in your cluster, to allow for import from derivation.
`concurrentTasks` can be around the CPU core count or lower if memory is
the bottleneck.
The optimal value depends on the resource consumption characteristics of your workload,
including memory usage and in-task parallelism. This is typically determined empirically.
When scaling, it is generally better to have a double-size machine than two machines,
because each split of resources causes inefficiencies; particularly with regards
to build latency because of extra downloads.
'';
type = types.either types.ints.positive (types.enum [ "auto" ]);
default = "auto";
defaultText = lib.literalMD ''
`"auto"`, meaning equal to the number of CPU cores.
'';
};
labels = lib.mkOption {
description = lib.mdDoc ''
A key-value map of user data.
This data will be available to organization members in the dashboard and API.
The values can be of any TOML type that corresponds to a JSON type, but arrays
can not contain tables/objects due to limitations of the TOML library. Values
involving arrays of non-primitive types may not be representable currently.
'';
type = format.type;
defaultText = lib.literalExpression ''
{
agent.source = "..."; # One of "nixpkgs", "flake", "override"
lib.version = "...";
pkgs.version = "...";
}
'';
};
nixUserIsTrusted = lib.mkOption {
internal = true;
readOnly = true;
description = lib.mdDoc ''
Whether the agent's user should be considered trusted by Nix.
'';
type = types.bool;
default = lib.elem agent.user systemConfig.nix.settings.trusted-users;
};
workDirectory = lib.mkOption {
description = lib.mdDoc ''
The directory in which temporary subdirectories are created for task state. This includes sources for Nix evaluation.
'';
type = types.path;
default = config.baseDirectory + "/work";
defaultText = lib.literalExpression ''baseDirectory + "/work"'';
};
staticSecretsDirectory = lib.mkOption {
description = lib.mdDoc ''
This is the default directory to look for statically configured secrets like `cluster-join-token.key`.
See also `clusterJoinTokenPath` and `binaryCachesPath` for fine-grained configuration.
'';
type = types.path;
default = config.baseDirectory + "/secrets";
defaultText = lib.literalExpression ''baseDirectory + "/secrets"'';
};
clusterJoinTokenPath = lib.mkOption {
description = lib.mdDoc ''
Location of the cluster-join-token.key file.
You can retrieve the contents of the file when creating a new agent via
<https://hercules-ci.com/dashboard>.
As this value is confidential, it should not be in the store, but
installed using other means, such as agenix, NixOps
`deployment.keys`, or manual installation.
The contents of the file are used for authentication between the agent and the API.
'';
type = types.path;
default = config.staticSecretsDirectory + "/cluster-join-token.key";
defaultText = lib.literalExpression ''staticSecretsDirectory + "/cluster-join-token.key"'';
};
binaryCachesPath = lib.mkOption {
description = lib.mdDoc ''
Path to a JSON file containing binary cache secret keys.
As these values are confidential, they should not be in the store, but
copied over using other means, such as agenix, NixOps
`deployment.keys`, or manual installation.
The format is described on <https://docs.hercules-ci.com/hercules-ci-agent/binary-caches-json/>.
'';
type = types.path;
default = config.staticSecretsDirectory + "/binary-caches.json";
defaultText = lib.literalExpression ''staticSecretsDirectory + "/binary-caches.json"'';
};
secretsJsonPath = lib.mkOption {
description = lib.mdDoc ''
Path to a JSON file containing secrets for effects.
As these values are confidential, they should not be in the store, but
copied over using other means, such as agenix, NixOps
`deployment.keys`, or manual installation.
The format is described on <https://docs.hercules-ci.com/hercules-ci-agent/secrets-json/>.
'';
type = types.path;
default = config.staticSecretsDirectory + "/secrets.json";
defaultText = lib.literalExpression ''staticSecretsDirectory + "/secrets.json"'';
};
};
config = {
labels =
let
mkIfNotNull = x: lib.mkIf (x != null) x;
in
{
nixos = {
inherit (systemConfig.system.nixos)
release
codeName
tags;
configurationRevision = mkIfNotNull systemConfig.system.configurationRevision;
label = mkIfNotNull systemConfig.system.nixos.label;
systemName = mkIfNotNull systemConfig.system.name;
};
};
};
}

View file

@ -2,6 +2,7 @@
{
services.hercules-ci-agents.max = {
enable = true;
settings = {
clusterJoinTokenPath = config.age.secrets.hci-token-max.path;
binaryCachesPath = config.age.secrets.hci-cache-config-max.path;

View file

@ -2,6 +2,7 @@
{
services.hercules-ci-agents.nixpak = {
enable = true;
settings = {
clusterJoinTokenPath = config.age.secrets.hci-token-nixpak.path;
binaryCachesPath = config.age.secrets.hci-cache-config-nixpak.path;

View file

@ -7,6 +7,7 @@
group = "hci-private-void";
};
services.hercules-ci-agents.private-void = {
enable = true;
settings = {
clusterJoinTokenPath = config.age.secrets.hci-token-private-void.path;
binaryCachesPath = config.age.secrets.hci-cache-config-private-void.path;