groups: - name: signals-pipeline rules: - alert: SignalsScoringLatencyP95High expr: histogram_quantile(0.95, sum(rate(signals_reachability_scoring_duration_seconds_bucket[5m])) by (le)) > 2 for: 10m labels: severity: warning service: signals annotations: summary: "Signals scoring latency high (p95)" description: "Reachability scoring p95 exceeds 2s for 10m" - alert: SignalsCacheMissRateHigh expr: | clamp_min(rate(signals_cache_misses_total[5m]), 0) / clamp_min(rate(signals_cache_hits_total[5m]) + rate(signals_cache_misses_total[5m]), 1) > 0.3 for: 10m labels: severity: warning service: signals annotations: summary: "Signals cache miss rate high" description: "Cache miss ratio >30% over 10m; investigate Redis or key churn." - alert: SignalsCacheDown expr: signals_cache_available == 0 for: 2m labels: severity: critical service: signals annotations: summary: "Signals cache unavailable" description: "Redis cache reported unavailable for >2m" - alert: SignalsSensorStaleness expr: time() - max(signals_sensor_last_seen_timestamp_seconds) by (sensor) > 900 for: 5m labels: severity: warning service: signals annotations: summary: "Signals sensor stale" description: "No updates from sensor for >15 minutes" - alert: SignalsIngestionErrorRate expr: clamp_min(rate(signals_ingestion_failures_total[5m]), 0) / clamp_min(rate(signals_ingestion_total[5m]), 1) > 0.05 for: 5m labels: severity: critical service: signals annotations: summary: "Signals ingestion failures elevated" description: "Ingestion failure ratio above 5% over 5m"