fix tests. new product advisories enhancements
This commit is contained in:
42
docs/notifications/operations/alerts/notify-slo-alerts.yaml
Normal file
42
docs/notifications/operations/alerts/notify-slo-alerts.yaml
Normal file
@@ -0,0 +1,42 @@
|
||||
# Notify SLO Alerts
|
||||
# Prometheus alerting rules for the notification service
|
||||
|
||||
groups:
|
||||
- name: notify-slo
|
||||
rules:
|
||||
- alert: NotifyDeliverySuccessSLO
|
||||
expr: |
|
||||
(
|
||||
sum(rate(notify_delivery_success_total[5m])) /
|
||||
sum(rate(notify_delivery_total[5m]))
|
||||
) < 0.99
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
service: notify
|
||||
annotations:
|
||||
summary: "Notification delivery success rate below SLO"
|
||||
description: "Current success rate: {{ $value | humanizePercentage }}"
|
||||
|
||||
- alert: NotifyBacklogDepth
|
||||
expr: notify_backlog_depth > 10000
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
service: notify
|
||||
annotations:
|
||||
summary: "Notification backlog depth high"
|
||||
description: "Current backlog: {{ $value }} notifications"
|
||||
|
||||
- alert: NotifyLatencyP99
|
||||
expr: |
|
||||
histogram_quantile(0.99,
|
||||
sum(rate(notify_delivery_duration_seconds_bucket[5m])) by (le)
|
||||
) > 5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
service: notify
|
||||
annotations:
|
||||
summary: "Notification delivery P99 latency high"
|
||||
description: "P99 latency: {{ $value | humanizeDuration }}"
|
||||
Reference in New Issue
Block a user