up
Some checks failed
LNM Migration CI / build-runner (push) Has been cancelled
Ledger OpenAPI CI / deprecation-check (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Airgap Sealed CI Smoke / sealed-smoke (push) Has been cancelled
Ledger Packs CI / build-pack (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
Ledger OpenAPI CI / validate-oas (push) Has been cancelled
Ledger OpenAPI CI / check-wellknown (push) Has been cancelled
Ledger Packs CI / verify-pack (push) Has been cancelled
LNM Migration CI / validate-metrics (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Some checks failed
LNM Migration CI / build-runner (push) Has been cancelled
Ledger OpenAPI CI / deprecation-check (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Airgap Sealed CI Smoke / sealed-smoke (push) Has been cancelled
Ledger Packs CI / build-pack (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
Ledger OpenAPI CI / validate-oas (push) Has been cancelled
Ledger OpenAPI CI / check-wellknown (push) Has been cancelled
Ledger Packs CI / verify-pack (push) Has been cancelled
LNM Migration CI / validate-metrics (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
This commit is contained in:
57
ops/devops/lnm/alerts/lnm-alerts.yaml
Normal file
57
ops/devops/lnm/alerts/lnm-alerts.yaml
Normal file
@@ -0,0 +1,57 @@
|
||||
# LNM Migration Alert Rules
|
||||
# Prometheus alerting rules for linkset/advisory migrations
|
||||
|
||||
groups:
|
||||
- name: lnm-migration
|
||||
rules:
|
||||
- alert: LnmMigrationErrorRate
|
||||
expr: rate(lnm_migration_errors_total[5m]) > 0.1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
team: concelier
|
||||
annotations:
|
||||
summary: "LNM migration error rate elevated"
|
||||
description: "Migration errors: {{ $value | printf \"%.2f\" }}/s"
|
||||
|
||||
- alert: LnmBackfillStalled
|
||||
expr: increase(lnm_backfill_processed_total[10m]) == 0 and lnm_backfill_running == 1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
team: concelier
|
||||
annotations:
|
||||
summary: "LNM backfill stalled"
|
||||
description: "No progress in 10 minutes while backfill is running"
|
||||
|
||||
- alert: LnmLinksetCountMismatch
|
||||
expr: abs(lnm_linksets_total - lnm_linksets_expected) > 100
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
team: concelier
|
||||
annotations:
|
||||
summary: "Linkset count mismatch"
|
||||
description: "Expected {{ $labels.expected }}, got {{ $value }}"
|
||||
|
||||
- alert: LnmObservationsBacklogHigh
|
||||
expr: lnm_observations_backlog > 10000
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
team: excititor
|
||||
annotations:
|
||||
summary: "Advisory observations backlog high"
|
||||
description: "Backlog: {{ $value }} items"
|
||||
|
||||
- name: lnm-sla
|
||||
rules:
|
||||
- alert: LnmIngestToApiLatencyHigh
|
||||
expr: histogram_quantile(0.95, rate(lnm_ingest_to_api_latency_seconds_bucket[5m])) > 30
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
team: platform
|
||||
annotations:
|
||||
summary: "Ingest to API latency exceeds SLA"
|
||||
description: "P95 latency: {{ $value | printf \"%.1f\" }}s (SLA: 30s)"
|
||||
Reference in New Issue
Block a user