Some checks failed
LNM Migration CI / build-runner (push) Has been cancelled
Ledger OpenAPI CI / deprecation-check (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Airgap Sealed CI Smoke / sealed-smoke (push) Has been cancelled
Ledger Packs CI / build-pack (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
Ledger OpenAPI CI / validate-oas (push) Has been cancelled
Ledger OpenAPI CI / check-wellknown (push) Has been cancelled
Ledger Packs CI / verify-pack (push) Has been cancelled
LNM Migration CI / validate-metrics (push) Has been cancelled
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
58 lines
1.8 KiB
YAML
58 lines
1.8 KiB
YAML
# LNM Migration Alert Rules
|
|
# Prometheus alerting rules for linkset/advisory migrations
|
|
|
|
groups:
|
|
- name: lnm-migration
|
|
rules:
|
|
- alert: LnmMigrationErrorRate
|
|
expr: rate(lnm_migration_errors_total[5m]) > 0.1
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
team: concelier
|
|
annotations:
|
|
summary: "LNM migration error rate elevated"
|
|
description: "Migration errors: {{ $value | printf \"%.2f\" }}/s"
|
|
|
|
- alert: LnmBackfillStalled
|
|
expr: increase(lnm_backfill_processed_total[10m]) == 0 and lnm_backfill_running == 1
|
|
for: 10m
|
|
labels:
|
|
severity: critical
|
|
team: concelier
|
|
annotations:
|
|
summary: "LNM backfill stalled"
|
|
description: "No progress in 10 minutes while backfill is running"
|
|
|
|
- alert: LnmLinksetCountMismatch
|
|
expr: abs(lnm_linksets_total - lnm_linksets_expected) > 100
|
|
for: 15m
|
|
labels:
|
|
severity: warning
|
|
team: concelier
|
|
annotations:
|
|
summary: "Linkset count mismatch"
|
|
description: "Expected {{ $labels.expected }}, got {{ $value }}"
|
|
|
|
- alert: LnmObservationsBacklogHigh
|
|
expr: lnm_observations_backlog > 10000
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
team: excititor
|
|
annotations:
|
|
summary: "Advisory observations backlog high"
|
|
description: "Backlog: {{ $value }} items"
|
|
|
|
- name: lnm-sla
|
|
rules:
|
|
- alert: LnmIngestToApiLatencyHigh
|
|
expr: histogram_quantile(0.95, rate(lnm_ingest_to_api_latency_seconds_bucket[5m])) > 30
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
team: platform
|
|
annotations:
|
|
summary: "Ingest to API latency exceeds SLA"
|
|
description: "P95 latency: {{ $value | printf \"%.1f\" }}s (SLA: 30s)"
|