{ self, config, lib, pkgs, ... }: { services.prometheus = { rules = [(builtins.toJSON { groups = [ { name = "disk"; rules = [ { alert = "smartctl self-test failed to pass"; expr = "smartctl_device_smart_status != 1"; } { alert = "smartctl uncorrectable errors"; expr = ''smartctl_device_attribute{attribute_value_type="raw", attribute_name="Offline_Uncorrectable"} > 10''; } { alert = "smartctl sectors pending"; expr = ''smartctl_device_attribute{attribute_value_type="raw", attribute_name="Current_Pending_Sector"} > 10''; for = "10m"; } { alert = "ZFS Pool over 90% full"; expr = "round((zfs_pool_allocated_bytes / zfs_pool_size_bytes) * 100) > 90"; for = "10m"; } ]; } { name = "Machine Resources"; rules = [ { alert = "CPU Load over 100%"; expr = ''round((node_load5 / count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) * 100, 0.1) > 100''; for = "10m"; } { alert = "Memory usage over 90%"; expr = '' round( (node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Buffers_bytes - node_memory_Cached_bytes) / node_memory_MemTotal_bytes * 100 , 0.1) > 90 ''; for = "10m"; } ]; } { name = "systemd"; rules = [ { alert = "systemd unit failed"; expr = ''node_systemd_unit_state{state="failed"} >= 1''; for = "5m"; } ]; } { name = "Authentik"; rules = [ { alert = "authentik outpost down"; expr = ''authentik_outpost_connection != 1''; for = "5m"; } ]; } { name = "nginx"; rules = [ { alert = "nginx down"; expr = ''nginx_up != 1''; for = "5m"; } { alert = "nginx down"; expr = ''nginx_up != 1''; for = "5m"; } ]; } { name = "minecraft"; rules = [ { alert = "minecraft tps low"; expr = ''(mc_tps or minecraft_tps) < 16''; for = "5m"; } ]; } { name = "PowerDNS"; rules = [ { alert = "pdns latency high"; expr = ''pdns_auth_latency > 350''; for = "5m"; } { alert = "pdns send latency high"; expr = ''pdns_auth_send_latency > 85''; for = "5m"; } { alert = "pdns backend overloaded"; expr = ''pdns_auth_overload_drops > 10''; for = "5m"; } ]; } ]; })]; }; }