aboutsummaryrefslogtreecommitdiff
path: root/modules/monitoring/rules.nix
diff options
context:
space:
mode:
authorMax Audron <audron@cocaine.farm>2025-08-05 14:53:56 +0200
committerMax Audron <audron@cocaine.farm>2025-08-05 14:53:56 +0200
commit482e058f57ff14f6293e9011fa43f5e9db3723fc (patch)
tree899366cad885726e75bcd097cd1c3e979caed843 /modules/monitoring/rules.nix
parentadd homepage dashboard (diff)
add prometheus alerting rules
Diffstat (limited to '')
-rw-r--r--modules/monitoring/rules.nix97
1 files changed, 97 insertions, 0 deletions
diff --git a/modules/monitoring/rules.nix b/modules/monitoring/rules.nix
new file mode 100644
index 0000000..bff5aa8
--- /dev/null
+++ b/modules/monitoring/rules.nix
@@ -0,0 +1,97 @@
+{ self, config, lib, pkgs, ... }:
+
+{
+ services.prometheus = {
+ rules = [(builtins.toJSON {
+ groups = [
+ { name = "disk";
+ rules = [
+ { alert = "smartctl self-test failed to pass";
+ expr = "smartctl_device_smart_status != 1";
+ }
+ { alert = "smartctl uncorrectable errors";
+ expr = ''smartctl_device_attribute{attribute_value_type="raw", attribute_name="Offline_Uncorrectable"} > 10'';
+ }
+ { alert = "smartctl sectors pending";
+ expr = ''smartctl_device_attribute{attribute_value_type="raw", attribute_name="Current_Pending_Sector"} > 10'';
+ for = "10m";
+ }
+ { alert = "ZFS Pool over 90% full";
+ expr = "round((zfs_pool_allocated_bytes / zfs_pool_size_bytes) * 100) > 90";
+ for = "10m";
+ }
+ ];
+ }
+ { name = "Machine Resources";
+ rules = [
+ { alert = "CPU Load over 100%";
+ expr = ''round((node_load5 / count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) * 100, 0.1) > 100'';
+ for = "10m";
+ }
+ { alert = "Memory usage over 90%";
+ expr = ''
+ round(
+ (node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Buffers_bytes - node_memory_Cached_bytes)
+ / node_memory_MemTotal_bytes * 100
+ , 0.1) > 90
+ '';
+ for = "10m";
+ }
+ ];
+ }
+ { name = "systemd";
+ rules = [
+ { alert = "systemd unit failed";
+ expr = ''node_systemd_unit_state{state="failed"} >= 1'';
+ for = "5m";
+ }
+ ];
+ }
+ { name = "Authentik";
+ rules = [
+ { alert = "authentik outpost down";
+ expr = ''authentik_outpost_connection != 1'';
+ for = "5m";
+ }
+ ];
+ }
+ { name = "nginx";
+ rules = [
+ { alert = "nginx down";
+ expr = ''nginx_up != 1'';
+ for = "5m";
+ }
+ { alert = "nginx down";
+ expr = ''nginx_up != 1'';
+ for = "5m";
+ }
+ ];
+ }
+ { name = "minecraft";
+ rules = [
+ { alert = "minecraft tps low";
+ expr = ''(mc_tps or minecraft_tps) < 16'';
+ for = "5m";
+ }
+ ];
+ }
+ { name = "PowerDNS";
+ rules = [
+ { alert = "pdns latency high";
+ expr = ''pdns_auth_latency > 350'';
+ for = "5m";
+ }
+ { alert = "pdns send latency high";
+ expr = ''pdns_auth_send_latency > 85'';
+ for = "5m";
+ }
+ { alert = "pdns backend overloaded";
+ expr = ''pdns_auth_overload_drops > 10'';
+ for = "5m";
+ }
+ ];
+ }
+ ];
+ })];
+ };
+}