cluster/services/monitoring: make grafana highly available
This commit is contained in:
parent
1ca0e7f454
commit
5bd83ec5c1
3 changed files with 165 additions and 87 deletions
|
@ -1,9 +1,11 @@
|
|||
{ config, ... }:
|
||||
{ config, lib, ... }:
|
||||
|
||||
let
|
||||
nodeFor = nodeType: builtins.head config.services.monitoring.nodes.${nodeType};
|
||||
|
||||
meshIpFor = nodeType: config.vars.mesh.${nodeFor nodeType}.meshIp;
|
||||
|
||||
meshIpForNode = name: config.vars.mesh.${name}.meshIp;
|
||||
in
|
||||
|
||||
{
|
||||
|
@ -37,16 +39,24 @@ in
|
|||
ipv4 = meshIpFor "server";
|
||||
};
|
||||
};
|
||||
hostLinks = lib.genAttrs config.services.monitoring.nodes.grafana (name: {
|
||||
grafana = {
|
||||
protocol = "http";
|
||||
ipv4 = meshIpForNode name;
|
||||
};
|
||||
});
|
||||
services.monitoring = {
|
||||
nodes = {
|
||||
client = [ "checkmate" "thunderskin" "VEGAS" "prophet" ];
|
||||
blackbox = [ "checkmate" "VEGAS" "prophet" ];
|
||||
grafana = [ "VEGAS" "prophet" ];
|
||||
logging = [ "VEGAS" ];
|
||||
server = [ "VEGAS" ];
|
||||
};
|
||||
nixos = {
|
||||
client = ./client.nix;
|
||||
blackbox = ./blackbox.nix;
|
||||
grafana = ./grafana-ha.nix;
|
||||
logging = ./logging.nix;
|
||||
server = [
|
||||
./server.nix
|
||||
|
|
152
cluster/services/monitoring/grafana-ha.nix
Normal file
152
cluster/services/monitoring/grafana-ha.nix
Normal file
|
@ -0,0 +1,152 @@
|
|||
{ cluster, config, depot, lib, pkgs, tools, ... }:
|
||||
let
|
||||
inherit (tools.meta) domain;
|
||||
|
||||
inherit (cluster.config.links) loki-ingest prometheus-ingest;
|
||||
|
||||
inherit (cluster.config) hostLinks;
|
||||
|
||||
inherit (config.networking) hostName;
|
||||
|
||||
svc = cluster.config.services.monitoring;
|
||||
|
||||
iniList = lib.concatStringsSep " ";
|
||||
|
||||
login = x: "https://login.${domain}/auth/realms/master/protocol/openid-connect/${x}";
|
||||
in
|
||||
{
|
||||
age.secrets = {
|
||||
grafana-db-credentials = {
|
||||
file = ./secrets/grafana-db-credentials.age;
|
||||
owner = "grafana";
|
||||
};
|
||||
grafana-secrets.file = ./secrets/grafana-secrets.age;
|
||||
};
|
||||
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
package = depot.packages.grafana;
|
||||
dataDir = "/srv/storage/private/grafana";
|
||||
settings = {
|
||||
server = {
|
||||
root_url = "https://monitoring.${domain}/";
|
||||
http_addr = hostLinks.${hostName}.grafana.ipv4;
|
||||
http_port = hostLinks.${hostName}.grafana.port;
|
||||
};
|
||||
database = {
|
||||
type = "postgres";
|
||||
host = cluster.config.links.patroni-pg-access.tuple;
|
||||
user = "grafana";
|
||||
password = "$__file{${config.age.secrets.grafana-db-credentials.path}}";
|
||||
};
|
||||
analytics.reporting_enabled = false;
|
||||
"auth.generic_oauth" = {
|
||||
enabled = true;
|
||||
allow_sign_up = true;
|
||||
client_id = "net.privatevoid.monitoring1";
|
||||
auth_url = login "auth";
|
||||
token_url = login "token";
|
||||
api_url = login "userinfo";
|
||||
scopes = iniList [ "openid" "profile" "email" "roles" ];
|
||||
role_attribute_strict = true;
|
||||
role_attribute_path = "resource_access.monitoring.roles[0]";
|
||||
};
|
||||
security = {
|
||||
cookie_secure = true;
|
||||
disable_gravatar = true;
|
||||
};
|
||||
feature_toggles.enable = iniList [
|
||||
"tempoSearch"
|
||||
"tempoBackendSearch"
|
||||
"tempoServiceGraph"
|
||||
];
|
||||
};
|
||||
provision = {
|
||||
enable = true;
|
||||
datasources.settings.datasources = [
|
||||
{
|
||||
name = "Prometheus";
|
||||
uid = "PBFA97CFB590B2093";
|
||||
inherit (prometheus-ingest) url;
|
||||
type = "prometheus";
|
||||
isDefault = true;
|
||||
}
|
||||
{
|
||||
name = "Loki";
|
||||
uid = "P8E80F9AEF21F6940";
|
||||
inherit (loki-ingest) url;
|
||||
type = "loki";
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services = {
|
||||
grafana = {
|
||||
enable = false;
|
||||
serviceConfig.EnvironmentFile = config.age.secrets.grafana-secrets.path;
|
||||
};
|
||||
grafana-ha = let
|
||||
base = config.systemd.services.grafana;
|
||||
inherit (config.services) consul;
|
||||
svc = config.consul.services.grafana;
|
||||
run = pkgs.writeShellScript "grafana-ha-start" ''
|
||||
trap '${svc.commands.deregister}' EXIT
|
||||
${svc.commands.register}
|
||||
${base.serviceConfig.ExecStart}
|
||||
'';
|
||||
in {
|
||||
inherit (base) wantedBy;
|
||||
description = "Grafana | High Availability";
|
||||
aliases = [ "grafana.service" ];
|
||||
|
||||
after = base.after ++ [ "consul.service" ];
|
||||
requires = [ "consul.service" ];
|
||||
|
||||
serviceConfig = base.serviceConfig // {
|
||||
ExecStart = "${consul.package}/bin/consul lock --shell=false services/grafana ${run}";
|
||||
# consul uses AF_NETLINK to determine interface addresses, even when just registering a service
|
||||
RestrictAddressFamilies = base.serviceConfig.RestrictAddressFamilies ++ [ "AF_NETLINK" ];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
services.nginx = {
|
||||
upstreams.grafana-ha.servers = lib.mapAttrs' (_: links: lib.nameValuePair links.grafana.tuple {}) (lib.getAttrs (svc.nodes.grafana) hostLinks);
|
||||
|
||||
virtualHosts."monitoring.${domain}" = lib.recursiveUpdate (tools.nginx.vhosts.proxy "http://grafana-ha") {
|
||||
locations."/".proxyWebsockets = true;
|
||||
};
|
||||
};
|
||||
|
||||
security.acme.certs."monitoring.${domain}" = {
|
||||
dnsProvider = "pdns";
|
||||
webroot = lib.mkForce null;
|
||||
};
|
||||
|
||||
consul.services.grafana = {
|
||||
mode = "manual";
|
||||
unit = "grafana-ha";
|
||||
definition = rec {
|
||||
name = "grafana";
|
||||
address = depot.reflection.interfaces.primary.addrPublic;
|
||||
port = 443;
|
||||
checks = [
|
||||
{
|
||||
name = "Frontend";
|
||||
id = "service:grafana:frontend";
|
||||
interval = "30s";
|
||||
http = "https://${address}";
|
||||
tls_server_name = "monitoring.${domain}";
|
||||
method = "HEAD";
|
||||
}
|
||||
{
|
||||
name = "Backend";
|
||||
id = "service:grafana:backend";
|
||||
interval = "5s";
|
||||
http = "${hostLinks.${hostName}.grafana.url}/healthz";
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
}
|
|
@ -1,92 +1,8 @@
|
|||
{ cluster, config, depot, lib, tools, ... }:
|
||||
{ cluster, ... }:
|
||||
let
|
||||
inherit (tools.meta) domain;
|
||||
|
||||
inherit (config) links;
|
||||
|
||||
inherit (cluster.config.links) loki-ingest prometheus-ingest;
|
||||
|
||||
iniList = lib.concatStringsSep " ";
|
||||
|
||||
login = x: "https://login.${domain}/auth/realms/master/protocol/openid-connect/${x}";
|
||||
inherit (cluster.config.links) prometheus-ingest;
|
||||
in
|
||||
{
|
||||
age.secrets = {
|
||||
grafana-db-credentials = {
|
||||
file = ./secrets/grafana-db-credentials.age;
|
||||
owner = "grafana";
|
||||
};
|
||||
grafana-secrets.file = ./secrets/grafana-secrets.age;
|
||||
};
|
||||
|
||||
links = {
|
||||
grafana.protocol = "http";
|
||||
};
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
package = depot.packages.grafana;
|
||||
dataDir = "/srv/storage/private/grafana";
|
||||
settings = {
|
||||
server = {
|
||||
root_url = "https://monitoring.${domain}/";
|
||||
http_port = links.grafana.port;
|
||||
};
|
||||
database = {
|
||||
type = "postgres";
|
||||
host = cluster.config.links.patroni-pg-access.tuple;
|
||||
user = "grafana";
|
||||
password = "$__file{${config.age.secrets.grafana-db-credentials.path}}";
|
||||
};
|
||||
analytics.reporting_enabled = false;
|
||||
"auth.generic_oauth" = {
|
||||
enabled = true;
|
||||
allow_sign_up = true;
|
||||
client_id = "net.privatevoid.monitoring1";
|
||||
auth_url = login "auth";
|
||||
token_url = login "token";
|
||||
api_url = login "userinfo";
|
||||
scopes = iniList [ "openid" "profile" "email" "roles" ];
|
||||
role_attribute_strict = true;
|
||||
role_attribute_path = "resource_access.monitoring.roles[0]";
|
||||
};
|
||||
security = {
|
||||
cookie_secure = true;
|
||||
disable_gravatar = true;
|
||||
};
|
||||
feature_toggles.enable = iniList [
|
||||
"tempoSearch"
|
||||
"tempoBackendSearch"
|
||||
"tempoServiceGraph"
|
||||
];
|
||||
};
|
||||
provision = {
|
||||
enable = true;
|
||||
datasources.settings.datasources = [
|
||||
{
|
||||
name = "Prometheus";
|
||||
uid = "PBFA97CFB590B2093";
|
||||
inherit (prometheus-ingest) url;
|
||||
type = "prometheus";
|
||||
isDefault = true;
|
||||
}
|
||||
{
|
||||
name = "Loki";
|
||||
uid = "P8E80F9AEF21F6940";
|
||||
inherit (loki-ingest) url;
|
||||
type = "loki";
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services.grafana.serviceConfig = {
|
||||
EnvironmentFile = config.age.secrets.grafana-secrets.path;
|
||||
};
|
||||
|
||||
services.nginx.virtualHosts."monitoring.${domain}" = lib.recursiveUpdate (tools.nginx.vhosts.proxy links.grafana.url) {
|
||||
locations."/".proxyWebsockets = true;
|
||||
};
|
||||
|
||||
services.prometheus = {
|
||||
enable = true;
|
||||
listenAddress = prometheus-ingest.ipv4;
|
||||
|
|
Loading…
Reference in a new issue