Alerts

Inactive (12) Pending (0) Firing (4)

/etc/prometheus/alert.rules > containers

caddy_down (1 active)

alert: caddy_down
expr: absent((time() - container_last_seen{name="caddy"}) < 10)
for: 30s
labels:
  severity: critical
annotations:
  description: Caddy container is down for more than 30 seconds.
  summary: Caddy down

Labels	State	Active Since	Value
alertname="caddy_down" severity="critical"	firing	2026-03-14 09:18:36.11869088 +0000 UTC	1
Annotations
description Caddy container is down for more than 30 seconds. summary Caddy down

graphnode_down (1 active)

alert: graphnode_down
expr: absent((time() - container_last_seen{name="graph-node"}) < 10)
for: 30s
labels:
  severity: critical
annotations:
  description: Graph Node container is down for more than 30 seconds.
  summary: Graph Node down

Labels	State	Active Since	Value
alertname="graphnode_down" severity="critical"	firing	2026-03-14 09:18:36.11869088 +0000 UTC	1
Annotations
description Graph Node container is down for more than 30 seconds. summary Graph Node down

postgres_down (1 active)

alert: postgres_down
expr: absent((time() - container_last_seen{name="postgres"}) < 10)
for: 30s
labels:
  severity: critical
annotations:
  description: Postgres container is down for more than 30 seconds.
  summary: Postgres down

Labels	State	Active Since	Value
alertname="postgres_down" severity="critical"	firing	2026-03-14 09:18:36.11869088 +0000 UTC	1
Annotations
description Postgres container is down for more than 30 seconds. summary Postgres down

caddy_high_cpu (0 active)

alert: caddy_high_cpu
expr: sum(rate(container_cpu_usage_seconds_total{name="caddy"}[1m])) / count(node_cpu_seconds_total{mode="system"}) * 100 > 10
for: 30s
labels:
  severity: warning
annotations:
  description: Caddy CPU usage is {{ humanize $value}}%.
  summary: Caddy high CPU usage

caddy_high_memory (0 active)

alert: caddy_high_memory
expr: sum(container_memory_usage_bytes{name="caddy"}) > 1.2e+09
for: 30s
labels:
  severity: warning
annotations:
  description: Caddy memory consumption is at {{ humanize $value}}.
  summary: Caddy high memory usage

graphnode_high_cpu (0 active)

alert: graphnode_high_cpu
expr: sum(rate(container_cpu_usage_seconds_total{name="graph-node"}[1m])) / count(node_cpu_seconds_total{mode="system"}) * 100 > 10
for: 30s
labels:
  severity: warning
annotations:
  description: Graph Node CPU usage is {{ humanize $value}}%.
  summary: Graph Node high CPU usage

graphnode_high_memory (0 active)

alert: graphnode_high_memory
expr: sum(container_memory_usage_bytes{name="graph-node"}) > 1.2e+09
for: 30s
labels:
  severity: warning
annotations:
  description: Graph Node memory consumption is at {{ humanize $value}}.
  summary: Graph Node high memory usage

nginx_down (0 active)

alert: nginx_down
expr: absent((time() - container_last_seen{name="nginx-proxy"}) < 10)
for: 30s
labels:
  severity: critical
annotations:
  description: Nginx container is down for more than 30 seconds.
  summary: Nginx down

nginx_high_cpu (0 active)

alert: nginx_high_cpu
expr: sum(rate(container_cpu_usage_seconds_total{name="nginx-proxy"}[1m])) / count(node_cpu_seconds_total{mode="system"}) * 100 > 10
for: 30s
labels:
  severity: warning
annotations:
  description: PostNginxgres CPU usage is {{ humanize $value}}%.
  summary: Nginx high CPU usage

nginx_high_memory (0 active)

alert: nginx_high_memory
expr: sum(container_memory_usage_bytes{name="nginx-proxy"}) > 1.2e+09
for: 30s
labels:
  severity: warning
annotations:
  description: Nginx memory consumption is at {{ humanize $value}}.
  summary: Nginx high memory usage

postgres_high_cpu (0 active)

alert: postgres_high_cpu
expr: sum(rate(container_cpu_usage_seconds_total{name="postgres"}[1m])) / count(node_cpu_seconds_total{mode="system"}) * 100 > 10
for: 30s
labels:
  severity: warning
annotations:
  description: Postgres CPU usage is {{ humanize $value}}%.
  summary: Postgres high CPU usage

postgres_high_memory (0 active)

alert: postgres_high_memory
expr: sum(container_memory_usage_bytes{name="postgres"}) > 1.2e+09
for: 30s
labels:
  severity: warning
annotations:
  description: Postgres memory consumption is at {{ humanize $value}}.
  summary: Postgres high memory usage

/etc/prometheus/alert.rules > host

high_cpu_load (0 active)

alert: high_cpu_load
expr: node_load1 > 1.5
for: 30s
labels:
  severity: warning
annotations:
  description: Docker host is under high load, the avg load 1m is at {{ $value}}. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}.
  summary: Server under high load

high_memory_load (0 active)

alert: high_memory_load
expr: (sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes)) / sum(node_memory_MemTotal_bytes) * 100 > 85
for: 30s
labels:
  severity: warning
annotations:
  description: Docker host memory usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}.
  summary: Server memory is almost full

high_storage_load (0 active)

alert: high_storage_load
expr: (node_filesystem_size_bytes{fstype="aufs"} - node_filesystem_free_bytes{fstype="aufs"}) / node_filesystem_size_bytes{fstype="aufs"} * 100 > 85
for: 30s
labels:
  severity: warning
annotations:
  description: Docker host storage usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}.
  summary: Server storage is almost full

/etc/prometheus/alert.rules > targets

monitor_service_down (9 active)

alert: monitor_service_down
expr: up == 0
for: 30s
labels:
  severity: critical
annotations:
  description: Service {{ $labels.instance }} is down.
  summary: Monitor service non-operational

Labels	State	Active Since	Value
alertname="monitor_service_down" instance="indexer-service-l2-0:7300" job="indexer-service-l2-0" severity="critical"	firing	2026-03-14 09:18:32.878296715 +0000 UTC	0
Annotations
description Service indexer-service-l2-0:7300 is down. summary Monitor service non-operational
alertname="monitor_service_down" instance="nodeexporter:9100" job="nodeexporter-l2" severity="critical"	firing	2026-03-14 09:18:32.878296715 +0000 UTC	0
Annotations
description Service nodeexporter:9100 is down. summary Monitor service non-operational
alertname="monitor_service_down" instance="index-node-l2-5:8040" job="index-node-l2-5" severity="critical"	firing	2026-03-14 09:18:32.878296715 +0000 UTC	0
Annotations
description Service index-node-l2-5:8040 is down. summary Monitor service non-operational
alertname="monitor_service_down" instance="indexer-service-l2-2:7300" job="indexer-service-l2-2" severity="critical"	firing	2026-03-14 09:18:32.878296715 +0000 UTC	0
Annotations
description Service indexer-service-l2-2:7300 is down. summary Monitor service non-operational
alertname="monitor_service_down" instance="query-node-3:8040" job="query-node-3" severity="critical"	firing	2026-03-14 09:18:32.878296715 +0000 UTC	0
Annotations
description Service query-node-3:8040 is down. summary Monitor service non-operational
alertname="monitor_service_down" instance="query-node-4:8040" job="query-node-4" severity="critical"	firing	2026-03-14 09:18:32.878296715 +0000 UTC	0
Annotations
description Service query-node-4:8040 is down. summary Monitor service non-operational
alertname="monitor_service_down" instance="pushgateway-l2:9091" job="pushgateway-l2" severity="critical"	firing	2026-03-14 09:18:32.878296715 +0000 UTC	0
Annotations
description Service pushgateway-l2:9091 is down. summary Monitor service non-operational
alertname="monitor_service_down" instance="query-node-l2-2:8040" job="query-node-l2-2" severity="critical"	firing	2026-03-14 09:18:32.878296715 +0000 UTC	0
Annotations
description Service query-node-l2-2:8040 is down. summary Monitor service non-operational
alertname="monitor_service_down" instance="indexer-service-l2-1:7300" job="indexer-service-l2-1" severity="critical"	firing	2026-03-14 09:18:32.878296715 +0000 UTC	0
Annotations
description Service indexer-service-l2-1:7300 is down. summary Monitor service non-operational