# LNM Migration Alert Rules # Prometheus alerting rules for linkset/advisory migrations groups: - name: lnm-migration rules: - alert: LnmMigrationErrorRate expr: rate(lnm_migration_errors_total[5m]) > 0.1 for: 5m labels: severity: warning team: concelier annotations: summary: "LNM migration error rate elevated" description: "Migration errors: {{ $value | printf \"%.2f\" }}/s" - alert: LnmBackfillStalled expr: increase(lnm_backfill_processed_total[10m]) == 0 and lnm_backfill_running == 1 for: 10m labels: severity: critical team: concelier annotations: summary: "LNM backfill stalled" description: "No progress in 10 minutes while backfill is running" - alert: LnmLinksetCountMismatch expr: abs(lnm_linksets_total - lnm_linksets_expected) > 100 for: 15m labels: severity: warning team: concelier annotations: summary: "Linkset count mismatch" description: "Expected {{ $labels.expected }}, got {{ $value }}" - alert: LnmObservationsBacklogHigh expr: lnm_observations_backlog > 10000 for: 5m labels: severity: warning team: excititor annotations: summary: "Advisory observations backlog high" description: "Backlog: {{ $value }} items" - name: lnm-sla rules: - alert: LnmIngestToApiLatencyHigh expr: histogram_quantile(0.95, rate(lnm_ingest_to_api_latency_seconds_bucket[5m])) > 30 for: 10m labels: severity: warning team: platform annotations: summary: "Ingest to API latency exceeds SLA" description: "P95 latency: {{ $value | printf \"%.1f\" }}s (SLA: 30s)"