cluster/services/monitoring: make grafana highly available

This commit is contained in:
Max Headroom 2023-06-03 00:33:51 +02:00
parent 1ca0e7f454
commit 5bd83ec5c1
3 changed files with 165 additions and 87 deletions

View file

@ -1,9 +1,11 @@
{ config, ... }:
{ config, lib, ... }:
let
nodeFor = nodeType: builtins.head config.services.monitoring.nodes.${nodeType};
meshIpFor = nodeType: config.vars.mesh.${nodeFor nodeType}.meshIp;
meshIpForNode = name: config.vars.mesh.${name}.meshIp;
in
{
@ -37,16 +39,24 @@ in
ipv4 = meshIpFor "server";
};
};
hostLinks = lib.genAttrs config.services.monitoring.nodes.grafana (name: {
grafana = {
protocol = "http";
ipv4 = meshIpForNode name;
};
});
services.monitoring = {
nodes = {
client = [ "checkmate" "thunderskin" "VEGAS" "prophet" ];
blackbox = [ "checkmate" "VEGAS" "prophet" ];
grafana = [ "VEGAS" "prophet" ];
logging = [ "VEGAS" ];
server = [ "VEGAS" ];
};
nixos = {
client = ./client.nix;
blackbox = ./blackbox.nix;
grafana = ./grafana-ha.nix;
logging = ./logging.nix;
server = [
./server.nix

View file

@ -0,0 +1,152 @@
{ cluster, config, depot, lib, pkgs, tools, ... }:
let
inherit (tools.meta) domain;
inherit (cluster.config.links) loki-ingest prometheus-ingest;
inherit (cluster.config) hostLinks;
inherit (config.networking) hostName;
svc = cluster.config.services.monitoring;
iniList = lib.concatStringsSep " ";
login = x: "https://login.${domain}/auth/realms/master/protocol/openid-connect/${x}";
in
{
age.secrets = {
grafana-db-credentials = {
file = ./secrets/grafana-db-credentials.age;
owner = "grafana";
};
grafana-secrets.file = ./secrets/grafana-secrets.age;
};
services.grafana = {
enable = true;
package = depot.packages.grafana;
dataDir = "/srv/storage/private/grafana";
settings = {
server = {
root_url = "https://monitoring.${domain}/";
http_addr = hostLinks.${hostName}.grafana.ipv4;
http_port = hostLinks.${hostName}.grafana.port;
};
database = {
type = "postgres";
host = cluster.config.links.patroni-pg-access.tuple;
user = "grafana";
password = "$__file{${config.age.secrets.grafana-db-credentials.path}}";
};
analytics.reporting_enabled = false;
"auth.generic_oauth" = {
enabled = true;
allow_sign_up = true;
client_id = "net.privatevoid.monitoring1";
auth_url = login "auth";
token_url = login "token";
api_url = login "userinfo";
scopes = iniList [ "openid" "profile" "email" "roles" ];
role_attribute_strict = true;
role_attribute_path = "resource_access.monitoring.roles[0]";
};
security = {
cookie_secure = true;
disable_gravatar = true;
};
feature_toggles.enable = iniList [
"tempoSearch"
"tempoBackendSearch"
"tempoServiceGraph"
];
};
provision = {
enable = true;
datasources.settings.datasources = [
{
name = "Prometheus";
uid = "PBFA97CFB590B2093";
inherit (prometheus-ingest) url;
type = "prometheus";
isDefault = true;
}
{
name = "Loki";
uid = "P8E80F9AEF21F6940";
inherit (loki-ingest) url;
type = "loki";
}
];
};
};
systemd.services = {
grafana = {
enable = false;
serviceConfig.EnvironmentFile = config.age.secrets.grafana-secrets.path;
};
grafana-ha = let
base = config.systemd.services.grafana;
inherit (config.services) consul;
svc = config.consul.services.grafana;
run = pkgs.writeShellScript "grafana-ha-start" ''
trap '${svc.commands.deregister}' EXIT
${svc.commands.register}
${base.serviceConfig.ExecStart}
'';
in {
inherit (base) wantedBy;
description = "Grafana | High Availability";
aliases = [ "grafana.service" ];
after = base.after ++ [ "consul.service" ];
requires = [ "consul.service" ];
serviceConfig = base.serviceConfig // {
ExecStart = "${consul.package}/bin/consul lock --shell=false services/grafana ${run}";
# consul uses AF_NETLINK to determine interface addresses, even when just registering a service
RestrictAddressFamilies = base.serviceConfig.RestrictAddressFamilies ++ [ "AF_NETLINK" ];
};
};
};
services.nginx = {
upstreams.grafana-ha.servers = lib.mapAttrs' (_: links: lib.nameValuePair links.grafana.tuple {}) (lib.getAttrs (svc.nodes.grafana) hostLinks);
virtualHosts."monitoring.${domain}" = lib.recursiveUpdate (tools.nginx.vhosts.proxy "http://grafana-ha") {
locations."/".proxyWebsockets = true;
};
};
security.acme.certs."monitoring.${domain}" = {
dnsProvider = "pdns";
webroot = lib.mkForce null;
};
consul.services.grafana = {
mode = "manual";
unit = "grafana-ha";
definition = rec {
name = "grafana";
address = depot.reflection.interfaces.primary.addrPublic;
port = 443;
checks = [
{
name = "Frontend";
id = "service:grafana:frontend";
interval = "30s";
http = "https://${address}";
tls_server_name = "monitoring.${domain}";
method = "HEAD";
}
{
name = "Backend";
id = "service:grafana:backend";
interval = "5s";
http = "${hostLinks.${hostName}.grafana.url}/healthz";
}
];
};
};
}

View file

@ -1,92 +1,8 @@
{ cluster, config, depot, lib, tools, ... }:
{ cluster, ... }:
let
inherit (tools.meta) domain;
inherit (config) links;
inherit (cluster.config.links) loki-ingest prometheus-ingest;
iniList = lib.concatStringsSep " ";
login = x: "https://login.${domain}/auth/realms/master/protocol/openid-connect/${x}";
inherit (cluster.config.links) prometheus-ingest;
in
{
age.secrets = {
grafana-db-credentials = {
file = ./secrets/grafana-db-credentials.age;
owner = "grafana";
};
grafana-secrets.file = ./secrets/grafana-secrets.age;
};
links = {
grafana.protocol = "http";
};
services.grafana = {
enable = true;
package = depot.packages.grafana;
dataDir = "/srv/storage/private/grafana";
settings = {
server = {
root_url = "https://monitoring.${domain}/";
http_port = links.grafana.port;
};
database = {
type = "postgres";
host = cluster.config.links.patroni-pg-access.tuple;
user = "grafana";
password = "$__file{${config.age.secrets.grafana-db-credentials.path}}";
};
analytics.reporting_enabled = false;
"auth.generic_oauth" = {
enabled = true;
allow_sign_up = true;
client_id = "net.privatevoid.monitoring1";
auth_url = login "auth";
token_url = login "token";
api_url = login "userinfo";
scopes = iniList [ "openid" "profile" "email" "roles" ];
role_attribute_strict = true;
role_attribute_path = "resource_access.monitoring.roles[0]";
};
security = {
cookie_secure = true;
disable_gravatar = true;
};
feature_toggles.enable = iniList [
"tempoSearch"
"tempoBackendSearch"
"tempoServiceGraph"
];
};
provision = {
enable = true;
datasources.settings.datasources = [
{
name = "Prometheus";
uid = "PBFA97CFB590B2093";
inherit (prometheus-ingest) url;
type = "prometheus";
isDefault = true;
}
{
name = "Loki";
uid = "P8E80F9AEF21F6940";
inherit (loki-ingest) url;
type = "loki";
}
];
};
};
systemd.services.grafana.serviceConfig = {
EnvironmentFile = config.age.secrets.grafana-secrets.path;
};
services.nginx.virtualHosts."monitoring.${domain}" = lib.recursiveUpdate (tools.nginx.vhosts.proxy links.grafana.url) {
locations."/".proxyWebsockets = true;
};
services.prometheus = {
enable = true;
listenAddress = prometheus-ingest.ipv4;