blob: bff5aa84752bae29ad48897e86c63371998a5661 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
{ self, config, lib, pkgs, ... }:
{
services.prometheus = {
rules = [(builtins.toJSON {
groups = [
{ name = "disk";
rules = [
{ alert = "smartctl self-test failed to pass";
expr = "smartctl_device_smart_status != 1";
}
{ alert = "smartctl uncorrectable errors";
expr = ''smartctl_device_attribute{attribute_value_type="raw", attribute_name="Offline_Uncorrectable"} > 10'';
}
{ alert = "smartctl sectors pending";
expr = ''smartctl_device_attribute{attribute_value_type="raw", attribute_name="Current_Pending_Sector"} > 10'';
for = "10m";
}
{ alert = "ZFS Pool over 90% full";
expr = "round((zfs_pool_allocated_bytes / zfs_pool_size_bytes) * 100) > 90";
for = "10m";
}
];
}
{ name = "Machine Resources";
rules = [
{ alert = "CPU Load over 100%";
expr = ''round((node_load5 / count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) * 100, 0.1) > 100'';
for = "10m";
}
{ alert = "Memory usage over 90%";
expr = ''
round(
(node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Buffers_bytes - node_memory_Cached_bytes)
/ node_memory_MemTotal_bytes * 100
, 0.1) > 90
'';
for = "10m";
}
];
}
{ name = "systemd";
rules = [
{ alert = "systemd unit failed";
expr = ''node_systemd_unit_state{state="failed"} >= 1'';
for = "5m";
}
];
}
{ name = "Authentik";
rules = [
{ alert = "authentik outpost down";
expr = ''authentik_outpost_connection != 1'';
for = "5m";
}
];
}
{ name = "nginx";
rules = [
{ alert = "nginx down";
expr = ''nginx_up != 1'';
for = "5m";
}
{ alert = "nginx down";
expr = ''nginx_up != 1'';
for = "5m";
}
];
}
{ name = "minecraft";
rules = [
{ alert = "minecraft tps low";
expr = ''(mc_tps or minecraft_tps) < 16'';
for = "5m";
}
];
}
{ name = "PowerDNS";
rules = [
{ alert = "pdns latency high";
expr = ''pdns_auth_latency > 350'';
for = "5m";
}
{ alert = "pdns send latency high";
expr = ''pdns_auth_send_latency > 85'';
for = "5m";
}
{ alert = "pdns backend overloaded";
expr = ''pdns_auth_overload_drops > 10'';
for = "5m";
}
];
}
];
})];
};
}
|