Files
git.stella-ops.org/docs/notifications/operations/alerts/notify-slo-alerts.yaml

43 lines
1.3 KiB
YAML

# Notify SLO Alerts
# Prometheus alerting rules for the notification service
groups:
- name: notify-slo
rules:
- alert: NotifyDeliverySuccessSLO
expr: |
(
sum(rate(notify_delivery_success_total[5m])) /
sum(rate(notify_delivery_total[5m]))
) < 0.99
for: 5m
labels:
severity: critical
service: notify
annotations:
summary: "Notification delivery success rate below SLO"
description: "Current success rate: {{ $value | humanizePercentage }}"
- alert: NotifyBacklogDepth
expr: notify_backlog_depth > 10000
for: 10m
labels:
severity: warning
service: notify
annotations:
summary: "Notification backlog depth high"
description: "Current backlog: {{ $value }} notifications"
- alert: NotifyLatencyP99
expr: |
histogram_quantile(0.99,
sum(rate(notify_delivery_duration_seconds_bucket[5m])) by (le)
) > 5
for: 5m
labels:
severity: warning
service: notify
annotations:
summary: "Notification delivery P99 latency high"
description: "P99 latency: {{ $value | humanizeDuration }}"