up
Some checks failed
LNM Migration CI / build-runner (push) Has been cancelled
Ledger OpenAPI CI / deprecation-check (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Airgap Sealed CI Smoke / sealed-smoke (push) Has been cancelled
Ledger Packs CI / build-pack (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
Ledger OpenAPI CI / validate-oas (push) Has been cancelled
Ledger OpenAPI CI / check-wellknown (push) Has been cancelled
Ledger Packs CI / verify-pack (push) Has been cancelled
LNM Migration CI / validate-metrics (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-12-14 18:33:02 +02:00
parent d233fa3529
commit 2e70c9fdb6
51 changed files with 5958 additions and 75 deletions

View File

@@ -0,0 +1,57 @@
# LNM Migration Alert Rules
# Prometheus alerting rules for linkset/advisory migrations
groups:
- name: lnm-migration
rules:
- alert: LnmMigrationErrorRate
expr: rate(lnm_migration_errors_total[5m]) > 0.1
for: 5m
labels:
severity: warning
team: concelier
annotations:
summary: "LNM migration error rate elevated"
description: "Migration errors: {{ $value | printf \"%.2f\" }}/s"
- alert: LnmBackfillStalled
expr: increase(lnm_backfill_processed_total[10m]) == 0 and lnm_backfill_running == 1
for: 10m
labels:
severity: critical
team: concelier
annotations:
summary: "LNM backfill stalled"
description: "No progress in 10 minutes while backfill is running"
- alert: LnmLinksetCountMismatch
expr: abs(lnm_linksets_total - lnm_linksets_expected) > 100
for: 15m
labels:
severity: warning
team: concelier
annotations:
summary: "Linkset count mismatch"
description: "Expected {{ $labels.expected }}, got {{ $value }}"
- alert: LnmObservationsBacklogHigh
expr: lnm_observations_backlog > 10000
for: 5m
labels:
severity: warning
team: excititor
annotations:
summary: "Advisory observations backlog high"
description: "Backlog: {{ $value }} items"
- name: lnm-sla
rules:
- alert: LnmIngestToApiLatencyHigh
expr: histogram_quantile(0.95, rate(lnm_ingest_to_api_latency_seconds_bucket[5m])) > 30
for: 10m
labels:
severity: warning
team: platform
annotations:
summary: "Ingest to API latency exceeds SLA"
description: "P95 latency: {{ $value | printf \"%.1f\" }}s (SLA: 30s)"